diff --git a/configure b/configure index 397bc7b71c5..813deeb85e2 100755 --- a/configure +++ b/configure @@ -718,6 +718,8 @@ XSLT_PE_LIBS XSLT_PE_CFLAGS XML2_PE_LIBS XML2_PE_CFLAGS +VKD3D_PE_LIBS +VKD3D_PE_CFLAGS TIFF_PE_LIBS TIFF_PE_CFLAGS PNG_PE_LIBS @@ -1537,6 +1539,7 @@ enable_strmbase enable_strmiids enable_tiff enable_uuid +enable_vkd3d enable_wbemuuid enable_wine enable_wmcodecdspuuid @@ -1687,6 +1690,8 @@ PNG_PE_CFLAGS PNG_PE_LIBS TIFF_PE_CFLAGS TIFF_PE_LIBS +VKD3D_PE_CFLAGS +VKD3D_PE_LIBS XML2_PE_CFLAGS XML2_PE_LIBS XSLT_PE_CFLAGS @@ -2477,6 +2482,11 @@ Some influential environment variables: C compiler flags for the PE tiff, overriding the bundled version TIFF_PE_LIBS Linker flags for the PE tiff, overriding the bundled version + VKD3D_PE_CFLAGS + C compiler flags for the PE vkd3d, overriding the bundled + version + VKD3D_PE_LIBS + Linker flags for the PE vkd3d, overriding the bundled version XML2_PE_CFLAGS C compiler flags for the PE xml2, overriding the bundled version XML2_PE_LIBS @@ -13063,6 +13073,21 @@ fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: tiff cflags: $TIFF_PE_CFLAGS" >&5 printf "%s\n" "$as_me:${as_lineno-$LINENO}: tiff libs: $TIFF_PE_LIBS" >&5 +if ${VKD3D_PE_LIBS:+false} : +then : + VKD3D_PE_LIBS=vkd3d + if ${VKD3D_PE_CFLAGS:+false} : +then : + VKD3D_PE_CFLAGS="-I\$(top_srcdir)/libs/vkd3d/include" +else $as_nop + enable_vkd3d=no +fi +else $as_nop + enable_vkd3d=no +fi +printf "%s\n" "$as_me:${as_lineno-$LINENO}: vkd3d cflags: $VKD3D_PE_CFLAGS" >&5 +printf "%s\n" "$as_me:${as_lineno-$LINENO}: vkd3d libs: $VKD3D_PE_LIBS" >&5 + if ${XML2_PE_LIBS:+false} : then : XML2_PE_LIBS=xml2 @@ -22036,6 +22061,7 @@ wine_fn_config_makefile libs/strmbase enable_strmbase wine_fn_config_makefile libs/strmiids enable_strmiids wine_fn_config_makefile libs/tiff enable_tiff wine_fn_config_makefile libs/uuid enable_uuid +wine_fn_config_makefile libs/vkd3d enable_vkd3d wine_fn_config_makefile libs/wbemuuid enable_wbemuuid wine_fn_config_makefile libs/wine enable_wine wine_fn_config_makefile libs/wmcodecdspuuid enable_wmcodecdspuuid @@ -23151,6 +23177,8 @@ PNG_PE_CFLAGS = $PNG_PE_CFLAGS PNG_PE_LIBS = $PNG_PE_LIBS TIFF_PE_CFLAGS = $TIFF_PE_CFLAGS TIFF_PE_LIBS = $TIFF_PE_LIBS +VKD3D_PE_CFLAGS = $VKD3D_PE_CFLAGS +VKD3D_PE_LIBS = $VKD3D_PE_LIBS XML2_PE_CFLAGS = $XML2_PE_CFLAGS XML2_PE_LIBS = $XML2_PE_LIBS XSLT_PE_CFLAGS = $XSLT_PE_CFLAGS diff --git a/configure.ac b/configure.ac index a9d3ff9a85c..5baf410b8b1 100644 --- a/configure.ac +++ b/configure.ac @@ -1104,6 +1104,7 @@ WINE_EXTLIB_FLAGS(LCMS2, lcms2, lcms2, "-I\$(top_srcdir)/libs/lcms2/include") WINE_EXTLIB_FLAGS(MPG123, mpg123, mpg123, "-I\$(top_srcdir)/libs/mpg123/src/libmpg123") WINE_EXTLIB_FLAGS(PNG, png, "png \$(ZLIB_PE_LIBS)", "-I\$(top_srcdir)/libs/png") WINE_EXTLIB_FLAGS(TIFF, tiff, "tiff \$(ZLIB_PE_LIBS)", "-I\$(top_srcdir)/libs/tiff/libtiff") +WINE_EXTLIB_FLAGS(VKD3D, vkd3d, vkd3d, "-I\$(top_srcdir)/libs/vkd3d/include") WINE_EXTLIB_FLAGS(XML2, xml2, xml2, "-I\$(top_srcdir)/libs/xml2/include -DLIBXML_STATIC") WINE_EXTLIB_FLAGS(XSLT, xslt, xslt, "-I\$(top_srcdir)/libs/xslt -DLIBXSLT_STATIC") WINE_EXTLIB_FLAGS(ZLIB, zlib, z, "-I\$(top_srcdir)/libs/zlib -DFAR= -DZ_SOLO") @@ -3245,6 +3246,7 @@ WINE_CONFIG_MAKEFILE(libs/strmbase) WINE_CONFIG_MAKEFILE(libs/strmiids) WINE_CONFIG_MAKEFILE(libs/tiff) WINE_CONFIG_MAKEFILE(libs/uuid) +WINE_CONFIG_MAKEFILE(libs/vkd3d) WINE_CONFIG_MAKEFILE(libs/wbemuuid) WINE_CONFIG_MAKEFILE(libs/wine) WINE_CONFIG_MAKEFILE(libs/wmcodecdspuuid) diff --git a/libs/vkd3d/AUTHORS b/libs/vkd3d/AUTHORS new file mode 100644 index 00000000000..bc2e08f6c48 --- /dev/null +++ b/libs/vkd3d/AUTHORS @@ -0,0 +1,26 @@ +Alexandre Julliard +Andrew Eikum +Andrey Gusev +Atharva Nimbalkar +Biswapriyo Nath +Chip Davis +Conor McCarthy +Derek Lesho +Francisco Casas +Giovanni Mascellani +Hans-Kristian Arntzen +Henri Verbeet +Isabella Bosia +Jactry Zeng +Joshua Ashton +Józef Kucia +Matteo Bruni +Nikolay Sivov +Philip Rebohle +Rémi Bernon +Robin Kertels +Stefan Dösinger +Sven Hesse +Vinson Lee +Zebediah Figura +Zhiyi Zhang diff --git a/libs/vkd3d/COPYING b/libs/vkd3d/COPYING new file mode 100644 index 00000000000..b7aa9277f37 --- /dev/null +++ b/libs/vkd3d/COPYING @@ -0,0 +1,16 @@ +Copyright 2016-2022 the Vkd3d project authors (see the file AUTHORS for a +complete list) + +Vkd3d is free software; you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the +Free Software Foundation; either version 2.1 of the License, or (at +your option) any later version. + +This library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA, 02110-1301 USA. diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in new file mode 100644 index 00000000000..c912f97165e --- /dev/null +++ b/libs/vkd3d/Makefile.in @@ -0,0 +1,37 @@ +EXTLIB = libvkd3d.a +EXTRAINCL = -I$(srcdir)/include -I$(srcdir)/include/private -I$(srcdir)/libs/vkd3d -I$(srcdir)/libs/vkd3d-shader +EXTRADEFS = \ + -DWINE_NO_NAMELESS_EXTENSION \ + -DCONST_VTABLE \ + -DLIBVKD3D_SOURCE \ + -DLIBVKD3D_SHADER_SOURCE \ + -DLIBVKD3D_UTILS_SOURCE + +SOURCES = \ + libs/vkd3d-common/blob.c \ + libs/vkd3d-common/debug.c \ + libs/vkd3d-common/error.c \ + libs/vkd3d-common/memory.c \ + libs/vkd3d-common/utf8.c \ + libs/vkd3d-shader/checksum.c \ + libs/vkd3d-shader/d3dbc.c \ + libs/vkd3d-shader/dxbc.c \ + libs/vkd3d-shader/glsl.c \ + libs/vkd3d-shader/hlsl.c \ + libs/vkd3d-shader/hlsl.l \ + libs/vkd3d-shader/hlsl.y \ + libs/vkd3d-shader/hlsl_codegen.c \ + libs/vkd3d-shader/hlsl_constant_ops.c \ + libs/vkd3d-shader/hlsl_sm1.c \ + libs/vkd3d-shader/hlsl_sm4.c \ + libs/vkd3d-shader/preproc.l \ + libs/vkd3d-shader/preproc.y \ + libs/vkd3d-shader/spirv.c \ + libs/vkd3d-shader/trace.c \ + libs/vkd3d-shader/vkd3d_shader_main.c \ + libs/vkd3d/command.c \ + libs/vkd3d/device.c \ + libs/vkd3d/resource.c \ + libs/vkd3d/state.c \ + libs/vkd3d/utils.c \ + libs/vkd3d/vkd3d_main.c diff --git a/libs/vkd3d/config.h b/libs/vkd3d/config.h new file mode 100644 index 00000000000..9b6daff5ee8 --- /dev/null +++ b/libs/vkd3d/config.h @@ -0,0 +1,5 @@ +#define PACKAGE_NAME "vkd3d" +#define PACKAGE_STRING "vkd3d 1.3" +#define PACKAGE_VERSION "1.3" +#define PATH_MAX 1024 +#define SONAME_LIBVULKAN "vulkan-1.dll" diff --git a/libs/vkd3d/include/private/vkd3d_blob.h b/libs/vkd3d/include/private/vkd3d_blob.h new file mode 100644 index 00000000000..0f133aa6fe7 --- /dev/null +++ b/libs/vkd3d/include/private/vkd3d_blob.h @@ -0,0 +1,27 @@ +/* + * Copyright 2017 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_BLOB_H +#define __VKD3D_BLOB_H + +#include "windows.h" +#include "d3dcommon.h" + +HRESULT vkd3d_blob_create(void *buffer, SIZE_T size, ID3D10Blob **blob); + +#endif /* __VKD3D_BLOB_H */ diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h new file mode 100644 index 00000000000..6e57e05c7e2 --- /dev/null +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -0,0 +1,245 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_COMMON_H +#define __VKD3D_COMMON_H + +#include "config.h" +#include "windows.h" +#include "vkd3d_types.h" + +#include +#include +#include +#include + +#ifdef _MSC_VER +#include +#endif + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) +#endif + +#define DIV_ROUND_UP(a, b) ((a) % (b) == 0 ? (a) / (b) : (a) / (b) + 1) + +#define STATIC_ASSERT(e) extern void __VKD3D_STATIC_ASSERT__(int [(e) ? 1 : -1]) + +#define MEMBER_SIZE(t, m) sizeof(((t *)0)->m) + +#define VKD3D_MAKE_TAG(ch0, ch1, ch2, ch3) \ + ((uint32_t)(ch0) | ((uint32_t)(ch1) << 8) \ + | ((uint32_t)(ch2) << 16) | ((uint32_t)(ch3) << 24)) + +static inline size_t align(size_t addr, size_t alignment) +{ + return (addr + (alignment - 1)) & ~(alignment - 1); +} + +#ifdef __GNUC__ +# define VKD3D_NORETURN __attribute__((noreturn)) +# define VKD3D_PRINTF_FUNC(fmt, args) __attribute__((format(printf, fmt, args))) +# define VKD3D_UNUSED __attribute__((unused)) +#else +# define VKD3D_NORETURN +# define VKD3D_PRINTF_FUNC(fmt, args) +# define VKD3D_UNUSED +#endif /* __GNUC__ */ + +static inline unsigned int vkd3d_popcount(unsigned int v) +{ +#ifdef _MSC_VER + return __popcnt(v); +#elif defined(__GNUC__) && (__GNUC__ >= 4) + return __builtin_popcount(v); +#else + v -= (v >> 1) & 0x55555555; + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + return (((v + (v >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24; +#endif +} + +static inline bool vkd3d_bitmask_is_contiguous(unsigned int mask) +{ + unsigned int i, j; + + for (i = 0, j = 0; i < sizeof(mask) * CHAR_BIT; ++i) + { + if (mask & (1u << i)) + ++j; + else if (j) + break; + } + + return vkd3d_popcount(mask) == j; +} + +/* Undefined for x == 0. */ +static inline unsigned int vkd3d_log2i(unsigned int x) +{ +#ifdef _WIN32 + /* _BitScanReverse returns the index of the highest set bit, + * unlike clz which is 31 - index. */ + ULONG result; + _BitScanReverse(&result, x); + return (unsigned int)result; +#elif defined(HAVE_BUILTIN_CLZ) + return __builtin_clz(x) ^ 0x1f; +#else + static const unsigned int l[] = + { + ~0u, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + }; + unsigned int i; + + return (i = x >> 16) ? (x = i >> 8) ? l[x] + 24 + : l[i] + 16 : (i = x >> 8) ? l[i] + 8 : l[x]; +#endif +} + +static inline void *vkd3d_memmem( const void *haystack, size_t haystack_len, const void *needle, size_t needle_len) +{ + const char *str = haystack; + + while (haystack_len >= needle_len) + { + if (!memcmp(str, needle, needle_len)) + return (char *)str; + ++str; + --haystack_len; + } + return NULL; +} + +static inline bool vkd3d_bound_range(size_t start, size_t count, size_t limit) +{ +#ifdef HAVE_BUILTIN_ADD_OVERFLOW + size_t sum; + + return !__builtin_add_overflow(start, count, &sum) && sum <= limit; +#else + return start <= limit && count <= limit - start; +#endif +} + +static inline uint16_t vkd3d_make_u16(uint8_t low, uint8_t high) +{ + return low | ((uint16_t)high << 8); +} + +static inline uint32_t vkd3d_make_u32(uint16_t low, uint16_t high) +{ + return low | ((uint32_t)high << 16); +} + +static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) +{ + return (x > y) - (x < y); +} + +static inline int ascii_isupper(int c) +{ + return 'A' <= c && c <= 'Z'; +} + +static inline int ascii_tolower(int c) +{ + return ascii_isupper(c) ? c - 'A' + 'a' : c; +} + +static inline int ascii_strncasecmp(const char *a, const char *b, size_t n) +{ + int c_a, c_b; + + while (n--) + { + c_a = ascii_tolower(*a++); + c_b = ascii_tolower(*b++); + if (c_a != c_b || !c_a) + return c_a - c_b; + } + return 0; +} + +static inline int ascii_strcasecmp(const char *a, const char *b) +{ + int c_a, c_b; + + do + { + c_a = ascii_tolower(*a++); + c_b = ascii_tolower(*b++); + } while (c_a == c_b && c_a != '\0'); + + return c_a - c_b; +} + +#ifndef _WIN32 +# if HAVE_SYNC_ADD_AND_FETCH +static inline LONG InterlockedIncrement(LONG volatile *x) +{ + return __sync_add_and_fetch(x, 1); +} +static inline LONG InterlockedAdd(LONG volatile *x, LONG val) +{ + return __sync_add_and_fetch(x, val); +} +# else +# error "InterlockedIncrement() not implemented for this platform" +# endif /* HAVE_SYNC_ADD_AND_FETCH */ + +# if HAVE_SYNC_SUB_AND_FETCH +static inline LONG InterlockedDecrement(LONG volatile *x) +{ + return __sync_sub_and_fetch(x, 1); +} +# else +# error "InterlockedDecrement() not implemented for this platform" +# endif +#endif /* _WIN32 */ + +static inline void vkd3d_parse_version(const char *version, int *major, int *minor) +{ + *major = atoi(version); + + while (isdigit(*version)) + ++version; + if (*version == '.') + ++version; + + *minor = atoi(version); +} + +HRESULT hresult_from_vkd3d_result(int vkd3d_result); + +#endif /* __VKD3D_COMMON_H */ diff --git a/libs/vkd3d/include/private/vkd3d_debug.h b/libs/vkd3d/include/private/vkd3d_debug.h new file mode 100644 index 00000000000..8ab653aef2e --- /dev/null +++ b/libs/vkd3d/include/private/vkd3d_debug.h @@ -0,0 +1,118 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_DEBUG_H +#define __VKD3D_DEBUG_H + +#include "vkd3d_common.h" + +#include +#include +#include + +#ifdef VKD3D_NO_TRACE_MESSAGES +#define TRACE(args...) do { } while (0) +#define TRACE_ON() (false) +#endif + +#ifdef VKD3D_NO_DEBUG_MESSAGES +#define WARN(args...) do { } while (0) +#define FIXME(args...) do { } while (0) +#endif + +enum vkd3d_dbg_level +{ + VKD3D_DBG_LEVEL_NONE, + VKD3D_DBG_LEVEL_ERR, + VKD3D_DBG_LEVEL_FIXME, + VKD3D_DBG_LEVEL_WARN, + VKD3D_DBG_LEVEL_TRACE, +}; + +enum vkd3d_dbg_level vkd3d_dbg_get_level(void); + +void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(3, 4); + +const char *vkd3d_dbg_sprintf(const char *fmt, ...) VKD3D_PRINTF_FUNC(1, 2); +const char *vkd3d_dbg_vsprintf(const char *fmt, va_list args); +const char *debugstr_a(const char *str); +const char *debugstr_an(const char *str, size_t n); +const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); + +#define VKD3D_DBG_LOG(level) \ + do { \ + const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \ + VKD3D_DBG_PRINTF + +#define VKD3D_DBG_LOG_ONCE(first_time_level, level) \ + do { \ + static bool vkd3d_dbg_next_time; \ + const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \ + ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \ + vkd3d_dbg_next_time = true; \ + VKD3D_DBG_PRINTF + +#define VKD3D_DBG_PRINTF(...) \ + vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) + +#ifndef TRACE +#define TRACE VKD3D_DBG_LOG(TRACE) +#endif + +#ifndef WARN +#define WARN VKD3D_DBG_LOG(WARN) +#endif + +#ifndef FIXME +#define FIXME VKD3D_DBG_LOG(FIXME) +#endif + +#define ERR VKD3D_DBG_LOG(ERR) + +#ifndef TRACE_ON +#define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE) +#endif + +#define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) + +#define VKD3D_DEBUG_ENV_NAME(name) const char *vkd3d_dbg_env_name = name + +static inline const char *debugstr_guid(const GUID *guid) +{ + if (!guid) + return "(null)"; + + return vkd3d_dbg_sprintf("{%08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x}", + (unsigned long)guid->Data1, guid->Data2, guid->Data3, guid->Data4[0], + guid->Data4[1], guid->Data4[2], guid->Data4[3], guid->Data4[4], + guid->Data4[5], guid->Data4[6], guid->Data4[7]); +} + +unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value); + +struct vkd3d_debug_option +{ + const char *name; + uint64_t flag; +}; + +bool vkd3d_debug_list_has_member(const char *string, const char *member); +uint64_t vkd3d_parse_debug_options(const char *string, + const struct vkd3d_debug_option *options, unsigned int option_count); + +#endif /* __VKD3D_DEBUG_H */ diff --git a/libs/vkd3d/include/private/vkd3d_memory.h b/libs/vkd3d/include/private/vkd3d_memory.h new file mode 100644 index 00000000000..8a2edb1000d --- /dev/null +++ b/libs/vkd3d/include/private/vkd3d_memory.h @@ -0,0 +1,70 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_MEMORY_H +#define __VKD3D_MEMORY_H + +#include +#include +#include +#include + +#include "vkd3d_debug.h" + +static inline void *vkd3d_malloc(size_t size) +{ + void *ptr; + if (!(ptr = malloc(size))) + ERR("Out of memory.\n"); + return ptr; +} + +static inline void *vkd3d_realloc(void *ptr, size_t size) +{ + if (!(ptr = realloc(ptr, size))) + ERR("Out of memory, size %zu.\n", size); + return ptr; +} + +static inline void *vkd3d_calloc(size_t count, size_t size) +{ + void *ptr; + assert(count <= ~(size_t)0 / size); + if (!(ptr = calloc(count, size))) + ERR("Out of memory.\n"); + return ptr; +} + +static inline void vkd3d_free(void *ptr) +{ + free(ptr); +} + +static inline char *vkd3d_strdup(const char *string) +{ + size_t len = strlen(string) + 1; + char *ptr; + + if ((ptr = vkd3d_malloc(len))) + memcpy(ptr, string, len); + return ptr; +} + +bool vkd3d_array_reserve(void **elements, size_t *capacity, size_t element_count, size_t element_size); + +#endif /* __VKD3D_MEMORY_H */ diff --git a/libs/vkd3d/include/private/vkd3d_utf8.h b/libs/vkd3d/include/private/vkd3d_utf8.h new file mode 100644 index 00000000000..ccb9e17efca --- /dev/null +++ b/libs/vkd3d/include/private/vkd3d_utf8.h @@ -0,0 +1,26 @@ +/* + * Copyright 2019 Zhiyi Zhang for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_UTF8_H +#define __VKD3D_UTF8_H + +#include "vkd3d_common.h" + +char *vkd3d_strdup_w_utf8(const WCHAR *wstr, size_t wchar_size); + +#endif /* __VKD3D_UTF8_H */ diff --git a/libs/vkd3d/include/private/vkd3d_version.h b/libs/vkd3d/include/private/vkd3d_version.h new file mode 100644 index 00000000000..712ddcf8491 --- /dev/null +++ b/libs/vkd3d/include/private/vkd3d_version.h @@ -0,0 +1 @@ +#define VKD3D_VCS_ID " (git d773dc05c687)" diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h new file mode 100644 index 00000000000..e998ee3f69e --- /dev/null +++ b/libs/vkd3d/include/vkd3d.h @@ -0,0 +1,262 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_H +#define __VKD3D_H + +#include + +#ifndef VKD3D_NO_WIN32_TYPES +# include +# include +#endif /* VKD3D_NO_WIN32_TYPES */ + +#ifndef VKD3D_NO_VULKAN_H +# include +#endif /* VKD3D_NO_VULKAN_H */ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +enum vkd3d_structure_type +{ + /* 1.0 */ + VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO, + + /* 1.1 */ + VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO, + + /* 1.2 */ + VKD3D_STRUCTURE_TYPE_OPTIONAL_DEVICE_EXTENSIONS_INFO, + VKD3D_STRUCTURE_TYPE_APPLICATION_INFO, + + /* 1.3 */ + VKD3D_STRUCTURE_TYPE_HOST_TIME_DOMAIN_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_STRUCTURE_TYPE), +}; + +enum vkd3d_api_version +{ + VKD3D_API_VERSION_1_0, + VKD3D_API_VERSION_1_1, + VKD3D_API_VERSION_1_2, + VKD3D_API_VERSION_1_3, +}; + +typedef HRESULT (*PFN_vkd3d_signal_event)(HANDLE event); + +typedef void * (*PFN_vkd3d_thread)(void *data); + +typedef void * (*PFN_vkd3d_create_thread)(PFN_vkd3d_thread thread_main, void *data); +typedef HRESULT (*PFN_vkd3d_join_thread)(void *thread); + +struct vkd3d_instance; + +struct vkd3d_instance_create_info +{ + enum vkd3d_structure_type type; + const void *next; + + PFN_vkd3d_signal_event pfn_signal_event; + PFN_vkd3d_create_thread pfn_create_thread; + PFN_vkd3d_join_thread pfn_join_thread; + size_t wchar_size; + + /* If set to NULL, libvkd3d loads libvulkan. */ + PFN_vkGetInstanceProcAddr pfn_vkGetInstanceProcAddr; + + const char * const *instance_extensions; + uint32_t instance_extension_count; +}; + +/* Extends vkd3d_instance_create_info. Available since 1.1. */ +struct vkd3d_optional_instance_extensions_info +{ + enum vkd3d_structure_type type; + const void *next; + + const char * const *extensions; + uint32_t extension_count; +}; + +/* Extends vkd3d_instance_create_info. Available since 1.2. */ +struct vkd3d_application_info +{ + enum vkd3d_structure_type type; + const void *next; + + const char *application_name; + uint32_t application_version; + + const char *engine_name; /* "vkd3d" if NULL */ + uint32_t engine_version; /* vkd3d version if engine_name is NULL */ + + enum vkd3d_api_version api_version; +}; + +/* Extends vkd3d_instance_create_info. Available since 1.3. */ +struct vkd3d_host_time_domain_info +{ + enum vkd3d_structure_type type; + const void *next; + + uint64_t ticks_per_second; +}; + +struct vkd3d_device_create_info +{ + enum vkd3d_structure_type type; + const void *next; + + D3D_FEATURE_LEVEL minimum_feature_level; + + struct vkd3d_instance *instance; + const struct vkd3d_instance_create_info *instance_create_info; + + VkPhysicalDevice vk_physical_device; + + const char * const *device_extensions; + uint32_t device_extension_count; + + IUnknown *parent; + LUID adapter_luid; +}; + +/* Extends vkd3d_device_create_info. Available since 1.2. */ +struct vkd3d_optional_device_extensions_info +{ + enum vkd3d_structure_type type; + const void *next; + + const char * const *extensions; + uint32_t extension_count; +}; + +/* vkd3d_image_resource_create_info flags */ +#define VKD3D_RESOURCE_INITIAL_STATE_TRANSITION 0x00000001 +#define VKD3D_RESOURCE_PRESENT_STATE_TRANSITION 0x00000002 + +struct vkd3d_image_resource_create_info +{ + enum vkd3d_structure_type type; + const void *next; + + VkImage vk_image; + D3D12_RESOURCE_DESC desc; + unsigned int flags; + D3D12_RESOURCE_STATES present_state; +}; + +#ifdef LIBVKD3D_SOURCE +# define VKD3D_API VKD3D_EXPORT +#else +# define VKD3D_API VKD3D_IMPORT +#endif + +#ifndef VKD3D_NO_PROTOTYPES + +VKD3D_API HRESULT vkd3d_create_instance(const struct vkd3d_instance_create_info *create_info, + struct vkd3d_instance **instance); +VKD3D_API ULONG vkd3d_instance_decref(struct vkd3d_instance *instance); +VKD3D_API VkInstance vkd3d_instance_get_vk_instance(struct vkd3d_instance *instance); +VKD3D_API ULONG vkd3d_instance_incref(struct vkd3d_instance *instance); + +VKD3D_API HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, + REFIID iid, void **device); +VKD3D_API IUnknown *vkd3d_get_device_parent(ID3D12Device *device); +VKD3D_API VkDevice vkd3d_get_vk_device(ID3D12Device *device); +VKD3D_API VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device); +VKD3D_API struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device); + +VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); +VKD3D_API VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue); +VKD3D_API void vkd3d_release_vk_queue(ID3D12CommandQueue *queue); + +VKD3D_API HRESULT vkd3d_create_image_resource(ID3D12Device *device, + const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource); +VKD3D_API ULONG vkd3d_resource_decref(ID3D12Resource *resource); +VKD3D_API ULONG vkd3d_resource_incref(ID3D12Resource *resource); + +VKD3D_API HRESULT vkd3d_serialize_root_signature(const D3D12_ROOT_SIGNATURE_DESC *desc, + D3D_ROOT_SIGNATURE_VERSION version, ID3DBlob **blob, ID3DBlob **error_blob); +VKD3D_API HRESULT vkd3d_create_root_signature_deserializer(const void *data, SIZE_T data_size, + REFIID iid, void **deserializer); + +VKD3D_API VkFormat vkd3d_get_vk_format(DXGI_FORMAT format); + +/* 1.1 */ +VKD3D_API DXGI_FORMAT vkd3d_get_dxgi_format(VkFormat format); + +/* 1.2 */ +VKD3D_API HRESULT vkd3d_serialize_versioned_root_signature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc, + ID3DBlob **blob, ID3DBlob **error_blob); +VKD3D_API HRESULT vkd3d_create_versioned_root_signature_deserializer(const void *data, SIZE_T data_size, + REFIID iid, void **deserializer); + +#endif /* VKD3D_NO_PROTOTYPES */ + +/* + * Function pointer typedefs for vkd3d functions. + */ +typedef HRESULT (*PFN_vkd3d_create_instance)(const struct vkd3d_instance_create_info *create_info, + struct vkd3d_instance **instance); +typedef ULONG (*PFN_vkd3d_instance_decref)(struct vkd3d_instance *instance); +typedef VkInstance (*PFN_vkd3d_instance_get_vk_instance)(struct vkd3d_instance *instance); +typedef ULONG (*PFN_vkd3d_instance_incref)(struct vkd3d_instance *instance); + +typedef HRESULT (*PFN_vkd3d_create_device)(const struct vkd3d_device_create_info *create_info, + REFIID iid, void **device); +typedef IUnknown * (*PFN_vkd3d_get_device_parent)(ID3D12Device *device); +typedef VkDevice (*PFN_vkd3d_get_vk_device)(ID3D12Device *device); +typedef VkPhysicalDevice (*PFN_vkd3d_get_vk_physical_device)(ID3D12Device *device); +typedef struct vkd3d_instance * (*PFN_vkd3d_instance_from_device)(ID3D12Device *device); + +typedef uint32_t (*PFN_vkd3d_get_vk_queue_family_index)(ID3D12CommandQueue *queue); +typedef VkQueue (*PFN_vkd3d_acquire_vk_queue)(ID3D12CommandQueue *queue); +typedef void (*PFN_vkd3d_release_vk_queue)(ID3D12CommandQueue *queue); + +typedef HRESULT (*PFN_vkd3d_create_image_resource)(ID3D12Device *device, + const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource); +typedef ULONG (*PFN_vkd3d_resource_decref)(ID3D12Resource *resource); +typedef ULONG (*PFN_vkd3d_resource_incref)(ID3D12Resource *resource); + +typedef HRESULT (*PFN_vkd3d_serialize_root_signature)(const D3D12_ROOT_SIGNATURE_DESC *desc, + D3D_ROOT_SIGNATURE_VERSION version, ID3DBlob **blob, ID3DBlob **error_blob); +typedef HRESULT (*PFN_vkd3d_create_root_signature_deserializer)(const void *data, SIZE_T data_size, + REFIID iid, void **deserializer); + +typedef VkFormat (*PFN_vkd3d_get_vk_format)(DXGI_FORMAT format); + +/* 1.1 */ +typedef DXGI_FORMAT (*PFN_vkd3d_get_dxgi_format)(VkFormat format); + +/* 1.2 */ +typedef HRESULT (*PFN_vkd3d_serialize_versioned_root_signature)(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc, + ID3DBlob **blob, ID3DBlob **error_blob); +typedef HRESULT (*PFN_vkd3d_create_versioned_root_signature_deserializer)(const void *data, SIZE_T data_size, + REFIID iid, void **deserializer); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __VKD3D_H */ diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h new file mode 100644 index 00000000000..05d235f9de6 --- /dev/null +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -0,0 +1,1866 @@ +/* + * Copyright 2017-2019 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_SHADER_H +#define __VKD3D_SHADER_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * \file vkd3d_shader.h + * + * \since 1.2 + * + * This file contains definitions for the vkd3d-shader library. + * + * The vkd3d-shader library provides multiple utilities related to the + * compilation, transformation, and reflection of GPU shaders. + */ + +/** \since 1.3 */ +enum vkd3d_shader_api_version +{ + VKD3D_SHADER_API_VERSION_1_0, + VKD3D_SHADER_API_VERSION_1_1, + VKD3D_SHADER_API_VERSION_1_2, + VKD3D_SHADER_API_VERSION_1_3, +}; + +/** The type of a chained structure. */ +enum vkd3d_shader_structure_type +{ + /** The structure is a vkd3d_shader_compile_info structure. */ + VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO, + /** The structure is a vkd3d_shader_interface_info structure. */ + VKD3D_SHADER_STRUCTURE_TYPE_INTERFACE_INFO, + /** The structure is a vkd3d_shader_scan_descriptor_info structure. */ + VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO, + /** The structure is a vkd3d_shader_spirv_domain_shader_target_info structure. */ + VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_DOMAIN_SHADER_TARGET_INFO, + /** The structure is a vkd3d_shader_spirv_target_info structure. */ + VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO, + /** The structure is a vkd3d_shader_transform_feedback_info structure. */ + VKD3D_SHADER_STRUCTURE_TYPE_TRANSFORM_FEEDBACK_INFO, + + /** + * The structure is a vkd3d_shader_hlsl_source_info structure. + * \since 1.3 + */ + VKD3D_SHADER_STRUCTURE_TYPE_HLSL_SOURCE_INFO, + /** + * The structure is a vkd3d_shader_preprocess_info structure. + * \since 1.3 + */ + VKD3D_SHADER_STRUCTURE_TYPE_PREPROCESS_INFO, + /** + * The structure is a vkd3d_shader_descriptor_offset_info structure. + * \since 1.3 + */ + VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), +}; + +/** + * Determines how buffer UAVs are stored. + * + * This also affects UAV counters in Vulkan environments. In OpenGL + * environments, atomic counter buffers are always used for UAV counters. + */ +enum vkd3d_shader_compile_option_buffer_uav +{ + /** Use buffer textures for buffer UAVs. This is the default value. */ + VKD3D_SHADER_COMPILE_OPTION_BUFFER_UAV_STORAGE_TEXEL_BUFFER = 0x00000000, + /** Use storage buffers for buffer UAVs. */ + VKD3D_SHADER_COMPILE_OPTION_BUFFER_UAV_STORAGE_BUFFER = 0x00000001, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_BUFFER_UAV), +}; + +enum vkd3d_shader_compile_option_formatting_flags +{ + VKD3D_SHADER_COMPILE_OPTION_FORMATTING_NONE = 0x00000000, + VKD3D_SHADER_COMPILE_OPTION_FORMATTING_COLOUR = 0x00000001, + VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT = 0x00000002, + VKD3D_SHADER_COMPILE_OPTION_FORMATTING_OFFSETS = 0x00000004, + VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER = 0x00000008, + VKD3D_SHADER_COMPILE_OPTION_FORMATTING_RAW_IDS = 0x00000010, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), +}; + +enum vkd3d_shader_compile_option_name +{ + /** + * If \a value is nonzero, do not include debug information in the + * compiled shader. The default value is zero. + * + * This option is supported by vkd3d_shader_compile(). However, not all + * compilers support generating debug information. + */ + VKD3D_SHADER_COMPILE_OPTION_STRIP_DEBUG = 0x00000001, + /** \a value is a member of enum vkd3d_shader_compile_option_buffer_uav. */ + VKD3D_SHADER_COMPILE_OPTION_BUFFER_UAV = 0x00000002, + /** \a value is a member of enum vkd3d_shader_compile_option_formatting_flags. */ + VKD3D_SHADER_COMPILE_OPTION_FORMATTING = 0x00000003, + /** \a value is a member of enum vkd3d_shader_api_version. \since 1.3 */ + VKD3D_SHADER_COMPILE_OPTION_API_VERSION = 0x00000004, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), +}; + +/** + * Various settings which may affect shader compilation or scanning, passed as + * part of struct vkd3d_shader_compile_info. For more details, see the + * documentation for individual options. + */ +struct vkd3d_shader_compile_option +{ + /** Name of the option. */ + enum vkd3d_shader_compile_option_name name; + /** + * A value associated with the option. The type and interpretation of the + * value depends on the option in question. + */ + unsigned int value; +}; + +/** Describes which shader stages a resource is visible to. */ +enum vkd3d_shader_visibility +{ + /** The resource is visible to all shader stages. */ + VKD3D_SHADER_VISIBILITY_ALL = 0, + /** The resource is visible only to the vertex shader. */ + VKD3D_SHADER_VISIBILITY_VERTEX = 1, + /** The resource is visible only to the hull shader. */ + VKD3D_SHADER_VISIBILITY_HULL = 2, + /** The resource is visible only to the domain shader. */ + VKD3D_SHADER_VISIBILITY_DOMAIN = 3, + /** The resource is visible only to the geometry shader. */ + VKD3D_SHADER_VISIBILITY_GEOMETRY = 4, + /** The resource is visible only to the pixel shader. */ + VKD3D_SHADER_VISIBILITY_PIXEL = 5, + + /** The resource is visible only to the compute shader. */ + VKD3D_SHADER_VISIBILITY_COMPUTE = 1000000000, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_VISIBILITY), +}; + +/** A generic structure containing a GPU shader, in text or byte-code format. */ +struct vkd3d_shader_code +{ + /** + * Pointer to the code. Note that textual formats are not null-terminated. + * Therefore \a size should not include a null terminator, when this + * structure is passed as input to a vkd3d-shader function, and the + * allocated string will not include a null terminator when this structure + * is used as output. + */ + const void *code; + /** Size of \a code, in bytes. */ + size_t size; +}; + +/** The type of a shader resource descriptor. */ +enum vkd3d_shader_descriptor_type +{ + /** + * The descriptor is a shader resource view. In Direct3D assembly, this is + * bound to a t# register. + */ + VKD3D_SHADER_DESCRIPTOR_TYPE_SRV = 0x0, + /** + * The descriptor is an unordered access view. In Direct3D assembly, this is + * bound to a u# register. + */ + VKD3D_SHADER_DESCRIPTOR_TYPE_UAV = 0x1, + /** + * The descriptor is a constant buffer view. In Direct3D assembly, this is + * bound to a cb# register. + */ + VKD3D_SHADER_DESCRIPTOR_TYPE_CBV = 0x2, + /** + * The descriptor is a sampler. In Direct3D assembly, this is bound to an s# + * register. + */ + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER = 0x3, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_DESCRIPTOR_TYPE), +}; + +/** + * A common structure describing the bind point of a descriptor or descriptor + * array in the target environment. + */ +struct vkd3d_shader_descriptor_binding +{ + /** + * The set of the descriptor. If the target environment does not support + * descriptor sets, this value must be set to 0. + */ + unsigned int set; + /** The binding index of the descriptor. */ + unsigned int binding; + /** + * The size of this descriptor array. If an offset is specified for this + * binding by the vkd3d_shader_descriptor_offset_info structure, counting + * starts at that offset. + */ + unsigned int count; +}; + +enum vkd3d_shader_binding_flag +{ + VKD3D_SHADER_BINDING_FLAG_BUFFER = 0x00000001, + VKD3D_SHADER_BINDING_FLAG_IMAGE = 0x00000002, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG), +}; + +enum vkd3d_shader_parameter_type +{ + VKD3D_SHADER_PARAMETER_TYPE_UNKNOWN, + VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT, + VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_TYPE), +}; + +enum vkd3d_shader_parameter_data_type +{ + VKD3D_SHADER_PARAMETER_DATA_TYPE_UNKNOWN, + VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_DATA_TYPE), +}; + +enum vkd3d_shader_parameter_name +{ + VKD3D_SHADER_PARAMETER_NAME_UNKNOWN, + VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), +}; + +struct vkd3d_shader_parameter_immediate_constant +{ + union + { + uint32_t u32; + } u; +}; + +struct vkd3d_shader_parameter_specialization_constant +{ + uint32_t id; +}; + +struct vkd3d_shader_parameter +{ + enum vkd3d_shader_parameter_name name; + enum vkd3d_shader_parameter_type type; + enum vkd3d_shader_parameter_data_type data_type; + union + { + struct vkd3d_shader_parameter_immediate_constant immediate_constant; + struct vkd3d_shader_parameter_specialization_constant specialization_constant; + } u; +}; + +/** + * Describes the mapping of a single resource or resource array to its binding + * point in the target environment. + * + * For example, to map a Direct3D SRV with register space 2, register "t3" to + * a Vulkan descriptor in set 4 and with binding 5, set the following members: + * - \a type = VKD3D_SHADER_DESCRIPTOR_TYPE_SRV + * - \a register_space = 2 + * - \a register_index = 3 + * - \a binding.set = 4 + * - \a binding.binding = 5 + * - \a binding.count = 1 + * + * This structure is used in struct vkd3d_shader_interface_info. + */ +struct vkd3d_shader_resource_binding +{ + /** The type of this descriptor. */ + enum vkd3d_shader_descriptor_type type; + /** + * Register space of the Direct3D resource. If the source format does not + * support multiple register spaces, this parameter must be set to 0. + */ + unsigned int register_space; + /** Register index of the DXBC resource. */ + unsigned int register_index; + /** Shader stage(s) to which the resource is visible. */ + enum vkd3d_shader_visibility shader_visibility; + /** A combination of zero or more elements of vkd3d_shader_binding_flag. */ + unsigned int flags; + + /** The binding in the target environment. */ + struct vkd3d_shader_descriptor_binding binding; +}; + +#define VKD3D_SHADER_DUMMY_SAMPLER_INDEX ~0u + +/** + * Describes the mapping of a Direct3D resource-sampler pair to a combined + * sampler (i.e. sampled image). + * + * This structure is used in struct vkd3d_shader_interface_info. + */ +struct vkd3d_shader_combined_resource_sampler +{ + /** + * Register space of the Direct3D resource. If the source format does not + * support multiple register spaces, this parameter must be set to 0. + */ + unsigned int resource_space; + /** Register index of the Direct3D resource. */ + unsigned int resource_index; + /** + * Register space of the Direct3D sampler. If the source format does not + * support multiple register spaces, this parameter must be set to 0. + */ + unsigned int sampler_space; + /** Register index of the Direct3D sampler. */ + unsigned int sampler_index; + /** Shader stage(s) to which the resource is visible. */ + enum vkd3d_shader_visibility shader_visibility; + /** A combination of zero or more elements of vkd3d_shader_binding_flag. */ + unsigned int flags; + + /** The binding in the target environment. */ + struct vkd3d_shader_descriptor_binding binding; +}; + +/** + * Describes the mapping of a single Direct3D UAV counter. + * + * This structure is used in struct vkd3d_shader_interface_info. + */ +struct vkd3d_shader_uav_counter_binding +{ + /** + * Register space of the Direct3D UAV descriptor. If the source format does + * not support multiple register spaces, this parameter must be set to 0. + */ + unsigned int register_space; + /** Register index of the Direct3D UAV descriptor. */ + unsigned int register_index; + /** Shader stage(s) to which the UAV counter is visible. */ + enum vkd3d_shader_visibility shader_visibility; + + /** The binding in the target environment. */ + struct vkd3d_shader_descriptor_binding binding; + unsigned int offset; +}; + +/** + * Describes the mapping of a Direct3D constant buffer to a range of push + * constants in the target environment. + * + * This structure is used in struct vkd3d_shader_interface_info. + */ +struct vkd3d_shader_push_constant_buffer +{ + /** + * Register space of the Direct3D resource. If the source format does not + * support multiple register spaces, this parameter must be set to 0. + */ + unsigned int register_space; + /** Register index of the Direct3D resource. */ + unsigned int register_index; + /** Shader stage(s) to which the resource is visible. */ + enum vkd3d_shader_visibility shader_visibility; + + /** Offset, in bytes, of the target push constants. */ + unsigned int offset; + /** Size, in bytes, of the target push constants. */ + unsigned int size; +}; + +/** + * A chained structure describing the interface between a compiled shader and + * the target environment. + * + * For example, when compiling Direct3D shader byte code to SPIR-V, this + * structure contains mappings from Direct3D descriptor registers to SPIR-V + * descriptor bindings. + * + * This structure is optional. If omitted, vkd3d_shader_compile() will use a + * default mapping, in which resources are mapped to sequential bindings in + * register set 0. + * + * This structure extends vkd3d_shader_compile_info. + * + * This structure contains only input parameters. + */ +struct vkd3d_shader_interface_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_INTERFACE_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** Pointer to an array of bindings for shader resource descriptors. */ + const struct vkd3d_shader_resource_binding *bindings; + /** Size, in elements, of \ref bindings. */ + unsigned int binding_count; + + /** Pointer to an array of bindings for push constant buffers. */ + const struct vkd3d_shader_push_constant_buffer *push_constant_buffers; + /** Size, in elements, of \ref push_constant_buffers. */ + unsigned int push_constant_buffer_count; + + /** Pointer to an array of bindings for combined samplers. */ + const struct vkd3d_shader_combined_resource_sampler *combined_samplers; + /** Size, in elements, of \ref combined_samplers. */ + unsigned int combined_sampler_count; + + /** Pointer to an array of bindings for UAV counters. */ + const struct vkd3d_shader_uav_counter_binding *uav_counters; + /** Size, in elements, of \ref uav_counters. */ + unsigned int uav_counter_count; +}; + +struct vkd3d_shader_transform_feedback_element +{ + unsigned int stream_index; + const char *semantic_name; + unsigned int semantic_index; + uint8_t component_index; + uint8_t component_count; + uint8_t output_slot; +}; + +/* Extends vkd3d_shader_interface_info. */ +struct vkd3d_shader_transform_feedback_info +{ + enum vkd3d_shader_structure_type type; + const void *next; + + const struct vkd3d_shader_transform_feedback_element *elements; + unsigned int element_count; + const unsigned int *buffer_strides; + unsigned int buffer_stride_count; +}; + +struct vkd3d_shader_descriptor_offset +{ + unsigned int static_offset; + unsigned int dynamic_offset_index; +}; + +/** + * A chained structure containing descriptor offsets. + * + * This structure is optional. + * + * This structure extends vkd3d_shader_interface_info. + * + * This structure contains only input parameters. + * + * \since 1.3 + */ +struct vkd3d_shader_descriptor_offset_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** + * Byte offset within the push constants of an array of 32-bit + * descriptor array offsets. See the description of 'binding_offsets' + * below. + */ + unsigned int descriptor_table_offset; + /** Size, in elements, of the descriptor table push constant array. */ + unsigned int descriptor_table_count; + + /** + * Pointer to an array of struct vkd3d_shader_descriptor_offset objects. + * The 'static_offset' field contains an offset into the descriptor arrays + * referenced by the 'bindings' array in struct vkd3d_shader_interface_info. + * This allows mapping multiple shader resource arrays to a single binding + * point in the target environment. + * + * 'dynamic_offset_index' in struct vkd3d_shader_descriptor_offset allows + * offsets to be set at runtime. The 32-bit descriptor table push constant + * at this index will be added to 'static_offset' to calculate the final + * binding offset. + * + * If runtime offsets are not required, set all 'dynamic_offset_index' + * values to \c ~0u and 'descriptor_table_count' to zero. + * + * For example, to map Direct3D constant buffer registers 'cb0[0:3]' and + * 'cb1[6:7]' to descriptors 8-12 and 4-5 in the Vulkan descriptor array in + * descriptor set 3 and with binding 2, set the following values in the + * 'bindings' array in struct vkd3d_shader_interface_info: + * + * \code + * type = VKD3D_SHADER_DESCRIPTOR_TYPE_CBV + * register_space = 0 + * register_index = 0 + * binding.set = 3 + * binding.binding = 2 + * binding.count = 4 + * + * type = VKD3D_SHADER_DESCRIPTOR_TYPE_CBV + * register_space = 0 + * register_index = 6 + * binding.set = 3 + * binding.binding = 2 + * binding.count = 2 + * \endcode + * + * and then pass \c {8, \c 4} as static binding offsets here. + * + * This field may be NULL, in which case the corresponding offsets are + * specified to be 0. + */ + const struct vkd3d_shader_descriptor_offset *binding_offsets; + + /** + * Pointer to an array of offsets into the descriptor arrays referenced by + * the 'uav_counters' array in struct vkd3d_shader_interface_info. This + * works the same way as \ref binding_offsets above. + */ + const struct vkd3d_shader_descriptor_offset *uav_counter_offsets; +}; + +/** The format of a shader to be compiled or scanned. */ +enum vkd3d_shader_source_type +{ + /** + * The shader has no type or is to be ignored. This is not a valid value + * for vkd3d_shader_compile() or vkd3d_shader_scan(). + */ + VKD3D_SHADER_SOURCE_NONE, + /** + * A 'Tokenized Program Format' shader embedded in a DXBC container. This is + * the format used for Direct3D shader model 4 and 5 shaders. + */ + VKD3D_SHADER_SOURCE_DXBC_TPF, + /** High-Level Shader Language source code. \since 1.3 */ + VKD3D_SHADER_SOURCE_HLSL, + /** + * Legacy Direct3D byte-code. This is the format used for Direct3D shader + * model 1, 2, and 3 shaders. \since 1.3 + */ + VKD3D_SHADER_SOURCE_D3D_BYTECODE, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), +}; + +/** The output format of a compiled shader. */ +enum vkd3d_shader_target_type +{ + /** + * The shader has no type or is to be ignored. This is not a valid value + * for vkd3d_shader_compile() or vkd3d_shader_scan(). + */ + VKD3D_SHADER_TARGET_NONE, + /** + * A SPIR-V shader in binary form. This is the format used for Vulkan + * shaders. + */ + VKD3D_SHADER_TARGET_SPIRV_BINARY, + VKD3D_SHADER_TARGET_SPIRV_TEXT, + /** + * Direct3D shader assembly. \since 1.3 + */ + VKD3D_SHADER_TARGET_D3D_ASM, + /** + * Legacy Direct3D byte-code. This is the format used for Direct3D shader + * model 1, 2, and 3 shaders. \since 1.3 + */ + VKD3D_SHADER_TARGET_D3D_BYTECODE, + /** + * A 'Tokenized Program Format' shader embedded in a DXBC container. This is + * the format used for Direct3D shader model 4 and 5 shaders. \since 1.3 + */ + VKD3D_SHADER_TARGET_DXBC_TPF, + /** + * An 'OpenGL Shading Language' shader. \since 1.3 + */ + VKD3D_SHADER_TARGET_GLSL, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_TARGET_TYPE), +}; + +/** + * Describes the minimum severity of compilation messages returned by + * vkd3d_shader_compile() and similar functions. + */ +enum vkd3d_shader_log_level +{ + /** No messages will be returned. */ + VKD3D_SHADER_LOG_NONE, + /** Only fatal errors which prevent successful compilation will be returned. */ + VKD3D_SHADER_LOG_ERROR, + /** Non-fatal warnings and fatal errors will be returned. */ + VKD3D_SHADER_LOG_WARNING, + /** + * All messages, including general informational messages, will be returned. + */ + VKD3D_SHADER_LOG_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_LOG_LEVEL), +}; + +/** + * A chained structure containing compilation parameters. + */ +struct vkd3d_shader_compile_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO. */ + enum vkd3d_shader_structure_type type; + /** + * Optional pointer to a structure containing further parameters. For a list + * of valid structures, refer to the respective function documentation. If + * no further parameters are needed, this field should be set to NULL. + */ + const void *next; + + /** Input source code or byte code. */ + struct vkd3d_shader_code source; + + /** Format of the input code passed in \ref source. */ + enum vkd3d_shader_source_type source_type; + /** Desired output format. */ + enum vkd3d_shader_target_type target_type; + + /** + * Pointer to an array of compilation options. This field is ignored if + * \ref option_count is zero, but must be valid otherwise. + * + * If the same option is specified multiple times, only the last value is + * used. + * + * Options not relevant to or not supported by a particular shader compiler + * or scanner will be ignored. + */ + const struct vkd3d_shader_compile_option *options; + /** Size, in elements, of \ref options. */ + unsigned int option_count; + + /** Minimum severity of messages returned from the shader function. */ + enum vkd3d_shader_log_level log_level; + /** + * Name of the initial source file, which may be used in error messages or + * debug information. This parameter is optional and may be NULL. + */ + const char *source_name; +}; + +enum vkd3d_shader_spirv_environment +{ + VKD3D_SHADER_SPIRV_ENVIRONMENT_NONE, + VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5, + VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0, /* default target */ + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SPIRV_ENVIRONMENT), +}; + +enum vkd3d_shader_spirv_extension +{ + VKD3D_SHADER_SPIRV_EXTENSION_NONE, + VKD3D_SHADER_SPIRV_EXTENSION_EXT_DEMOTE_TO_HELPER_INVOCATION, + /** \since 1.3 */ + VKD3D_SHADER_SPIRV_EXTENSION_EXT_DESCRIPTOR_INDEXING, + /** \since 1.3 */ + VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SPIRV_EXTENSION), +}; + +/* Extends vkd3d_shader_compile_info. */ +struct vkd3d_shader_spirv_target_info +{ + enum vkd3d_shader_structure_type type; + const void *next; + + const char *entry_point; /* "main" if NULL. */ + + enum vkd3d_shader_spirv_environment environment; + + const enum vkd3d_shader_spirv_extension *extensions; + unsigned int extension_count; + + const struct vkd3d_shader_parameter *parameters; + unsigned int parameter_count; + + bool dual_source_blending; + const unsigned int *output_swizzles; + unsigned int output_swizzle_count; +}; + +enum vkd3d_shader_tessellator_output_primitive +{ + VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT = 0x1, + VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE = 0x2, + VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW = 0x3, + VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW = 0x4, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_TESSELLATOR_OUTPUT_PRIMITIVE), +}; + +enum vkd3d_shader_tessellator_partitioning +{ + VKD3D_SHADER_TESSELLATOR_PARTITIONING_INTEGER = 0x1, + VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2 = 0x2, + VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 0x3, + VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 0x4, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_TESSELLATOR_PARTITIONING), +}; + +/* Extends vkd3d_shader_spirv_target_info. */ +struct vkd3d_shader_spirv_domain_shader_target_info +{ + enum vkd3d_shader_structure_type type; + const void *next; + + enum vkd3d_shader_tessellator_output_primitive output_primitive; + enum vkd3d_shader_tessellator_partitioning partitioning; +}; + +/** + * A single preprocessor macro, passed as part of struct + * vkd3d_shader_preprocess_info. + */ +struct vkd3d_shader_macro +{ + /** + * Pointer to a null-terminated string containing the name of a macro. This + * macro must not be a parameterized (i.e. function-like) macro. If this + * field is not a valid macro identifier, this macro will be ignored. + */ + const char *name; + /** + * Optional pointer to a null-terminated string containing the expansion of + * the macro. This field may be set to NULL, in which case the macro has an + * empty expansion. + */ + const char *value; +}; + +/** + * Type of a callback function which will be used to open preprocessor includes. + * + * This callback function is passed as part of struct + * vkd3d_shader_preprocess_info. + * + * If this function fails, vkd3d-shader will emit a compilation error, and the + * \a pfn_close_include callback will not be called. + * + * \param filename Unquoted string used as an argument to the \#include + * directive. + * + * \param local Whether the \#include directive is requesting a local (i.e. + * double-quoted) or system (i.e. angle-bracketed) include. + * + * \param parent_data Unprocessed source code of the file in which this + * \#include directive is evaluated. This parameter may be NULL. + * + * \param context The user-defined pointer passed to struct + * vkd3d_shader_preprocess_info. + * + * \param out Output location for the full contents of the included file. The + * code need not be allocated using standard vkd3d functions, but must remain + * valid until the corresponding call to \a pfn_close_include. If this function + * fails, the contents of this parameter are ignored. + * + * \return A member of \ref vkd3d_result. + */ +typedef int (*PFN_vkd3d_shader_open_include)(const char *filename, bool local, + const char *parent_data, void *context, struct vkd3d_shader_code *out); +/** + * Type of a callback function which will be used to close preprocessor + * includes. + * + * This callback function is passed as part of struct + * vkd3d_shader_preprocess_info. + * + * \param code Contents of the included file, which were allocated by the + * \ref pfn_open_include callback. The user must free them. + * + * \param context The user-defined pointer passed to struct + * vkd3d_shader_preprocess_info. + */ +typedef void (*PFN_vkd3d_shader_close_include)(const struct vkd3d_shader_code *code, void *context); + +/** + * A chained structure containing preprocessing parameters. + * + * This structure is optional. + * + * This structure extends vkd3d_shader_compile_info. + * + * This structure contains only input parameters. + * + * \since 1.3 + */ +struct vkd3d_shader_preprocess_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_PREPROCESS_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** + * Pointer to an array of predefined macros. Each macro in this array will + * be expanded as if a corresponding #define statement were prepended to the + * source code. + * + * If the same macro is specified multiple times, only the last value is + * used. + */ + const struct vkd3d_shader_macro *macros; + /** Size, in elements, of \ref macros. */ + unsigned int macro_count; + + /** + * Optional pointer to a callback function, which will be called in order to + * evaluate \#include directives. The function receives parameters + * corresponding to the directive's arguments, and should return the + * complete text of the included file. + * + * If this field is set to NULL, or if this structure is omitted, + * vkd3d-shader will attempt to open included files using POSIX file APIs. + * + * If this field is set to NULL, the \ref pfn_close_include field must also + * be set to NULL. + */ + PFN_vkd3d_shader_open_include pfn_open_include; + /** + * Optional pointer to a callback function, which will be called whenever an + * included file is closed. This function will be called exactly once for + * each successful call to \ref pfn_open_include, and should be used to free + * any resources allocated thereby. + * + * If this field is set to NULL, the \ref pfn_open_include field must also + * be set to NULL. + */ + PFN_vkd3d_shader_close_include pfn_close_include; + /** + * User-defined pointer which will be passed unmodified to the + * \ref pfn_open_include and \ref pfn_close_include callbacks. + */ + void *include_context; +}; + +/** + * A chained structure containing HLSL compilation parameters. + * + * This structure is optional. + * + * This structure extends vkd3d_shader_compile_info. + * + * This structure contains only input parameters. + * + * \since 1.3 + */ +struct vkd3d_shader_hlsl_source_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_HLSL_SOURCE_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** + * Optional pointer to a null-terminated string containing the shader entry + * point. + * + * If this parameter is NULL, vkd3d-shader uses the entry point "main". + */ + const char *entry_point; + struct vkd3d_shader_code secondary_code; + /** + * Pointer to a null-terminated string containing the target shader + * profile. + */ + const char *profile; +}; + +/* root signature 1.0 */ +enum vkd3d_shader_filter +{ + VKD3D_SHADER_FILTER_MIN_MAG_MIP_POINT = 0x000, + VKD3D_SHADER_FILTER_MIN_MAG_POINT_MIP_LINEAR = 0x001, + VKD3D_SHADER_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT = 0x004, + VKD3D_SHADER_FILTER_MIN_POINT_MAG_MIP_LINEAR = 0x005, + VKD3D_SHADER_FILTER_MIN_LINEAR_MAG_MIP_POINT = 0x010, + VKD3D_SHADER_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR = 0x011, + VKD3D_SHADER_FILTER_MIN_MAG_LINEAR_MIP_POINT = 0x014, + VKD3D_SHADER_FILTER_MIN_MAG_MIP_LINEAR = 0x015, + VKD3D_SHADER_FILTER_ANISOTROPIC = 0x055, + VKD3D_SHADER_FILTER_COMPARISON_MIN_MAG_MIP_POINT = 0x080, + VKD3D_SHADER_FILTER_COMPARISON_MIN_MAG_POINT_MIP_LINEAR = 0x081, + VKD3D_SHADER_FILTER_COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT = 0x084, + VKD3D_SHADER_FILTER_COMPARISON_MIN_POINT_MAG_MIP_LINEAR = 0x085, + VKD3D_SHADER_FILTER_COMPARISON_MIN_LINEAR_MAG_MIP_POINT = 0x090, + VKD3D_SHADER_FILTER_COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR = 0x091, + VKD3D_SHADER_FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT = 0x094, + VKD3D_SHADER_FILTER_COMPARISON_MIN_MAG_MIP_LINEAR = 0x095, + VKD3D_SHADER_FILTER_COMPARISON_ANISOTROPIC = 0x0d5, + VKD3D_SHADER_FILTER_MINIMUM_MIN_MAG_MIP_POINT = 0x100, + VKD3D_SHADER_FILTER_MINIMUM_MIN_MAG_POINT_MIP_LINEAR = 0x101, + VKD3D_SHADER_FILTER_MINIMUM_MIN_POINT_MAG_LINEAR_MIP_POINT = 0x104, + VKD3D_SHADER_FILTER_MINIMUM_MIN_POINT_MAG_MIP_LINEAR = 0x105, + VKD3D_SHADER_FILTER_MINIMUM_MIN_LINEAR_MAG_MIP_POINT = 0x110, + VKD3D_SHADER_FILTER_MINIMUM_MIN_LINEAR_MAG_POINT_MIP_LINEAR = 0x111, + VKD3D_SHADER_FILTER_MINIMUM_MIN_MAG_LINEAR_MIP_POINT = 0x114, + VKD3D_SHADER_FILTER_MINIMUM_MIN_MAG_MIP_LINEAR = 0x115, + VKD3D_SHADER_FILTER_MINIMUM_ANISOTROPIC = 0x155, + VKD3D_SHADER_FILTER_MAXIMUM_MIN_MAG_MIP_POINT = 0x180, + VKD3D_SHADER_FILTER_MAXIMUM_MIN_MAG_POINT_MIP_LINEAR = 0x181, + VKD3D_SHADER_FILTER_MAXIMUM_MIN_POINT_MAG_LINEAR_MIP_POINT = 0x184, + VKD3D_SHADER_FILTER_MAXIMUM_MIN_POINT_MAG_MIP_LINEAR = 0x185, + VKD3D_SHADER_FILTER_MAXIMUM_MIN_LINEAR_MAG_MIP_POINT = 0x190, + VKD3D_SHADER_FILTER_MAXIMUM_MIN_LINEAR_MAG_POINT_MIP_LINEAR = 0x191, + VKD3D_SHADER_FILTER_MAXIMUM_MIN_MAG_LINEAR_MIP_POINT = 0x194, + VKD3D_SHADER_FILTER_MAXIMUM_MIN_MAG_MIP_LINEAR = 0x195, + VKD3D_SHADER_FILTER_MAXIMUM_ANISOTROPIC = 0x1d5, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_FILTER), +}; + +enum vkd3d_shader_texture_address_mode +{ + VKD3D_SHADER_TEXTURE_ADDRESS_MODE_WRAP = 0x1, + VKD3D_SHADER_TEXTURE_ADDRESS_MODE_MIRROR = 0x2, + VKD3D_SHADER_TEXTURE_ADDRESS_MODE_CLAMP = 0x3, + VKD3D_SHADER_TEXTURE_ADDRESS_MODE_BORDER = 0x4, + VKD3D_SHADER_TEXTURE_ADDRESS_MODE_MIRROR_ONCE = 0x5, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_TEXTURE_ADDRESS_MODE), +}; + +enum vkd3d_shader_comparison_func +{ + VKD3D_SHADER_COMPARISON_FUNC_NEVER = 0x1, + VKD3D_SHADER_COMPARISON_FUNC_LESS = 0x2, + VKD3D_SHADER_COMPARISON_FUNC_EQUAL = 0x3, + VKD3D_SHADER_COMPARISON_FUNC_LESS_EQUAL = 0x4, + VKD3D_SHADER_COMPARISON_FUNC_GREATER = 0x5, + VKD3D_SHADER_COMPARISON_FUNC_NOT_EQUAL = 0x6, + VKD3D_SHADER_COMPARISON_FUNC_GREATER_EQUAL = 0x7, + VKD3D_SHADER_COMPARISON_FUNC_ALWAYS = 0x8, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPARISON_FUNC), +}; + +enum vkd3d_shader_static_border_colour +{ + VKD3D_SHADER_STATIC_BORDER_COLOUR_TRANSPARENT_BLACK = 0x0, + VKD3D_SHADER_STATIC_BORDER_COLOUR_OPAQUE_BLACK = 0x1, + VKD3D_SHADER_STATIC_BORDER_COLOUR_OPAQUE_WHITE = 0x2, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STATIC_BORDER_COLOUR), +}; + +struct vkd3d_shader_static_sampler_desc +{ + enum vkd3d_shader_filter filter; + enum vkd3d_shader_texture_address_mode address_u; + enum vkd3d_shader_texture_address_mode address_v; + enum vkd3d_shader_texture_address_mode address_w; + float mip_lod_bias; + unsigned int max_anisotropy; + enum vkd3d_shader_comparison_func comparison_func; + enum vkd3d_shader_static_border_colour border_colour; + float min_lod; + float max_lod; + unsigned int shader_register; + unsigned int register_space; + enum vkd3d_shader_visibility shader_visibility; +}; + +struct vkd3d_shader_descriptor_range +{ + enum vkd3d_shader_descriptor_type range_type; + unsigned int descriptor_count; + unsigned int base_shader_register; + unsigned int register_space; + unsigned int descriptor_table_offset; +}; + +struct vkd3d_shader_root_descriptor_table +{ + unsigned int descriptor_range_count; + const struct vkd3d_shader_descriptor_range *descriptor_ranges; +}; + +struct vkd3d_shader_root_constants +{ + unsigned int shader_register; + unsigned int register_space; + unsigned int value_count; +}; + +struct vkd3d_shader_root_descriptor +{ + unsigned int shader_register; + unsigned int register_space; +}; + +enum vkd3d_shader_root_parameter_type +{ + VKD3D_SHADER_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE = 0x0, + VKD3D_SHADER_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS = 0x1, + VKD3D_SHADER_ROOT_PARAMETER_TYPE_CBV = 0x2, + VKD3D_SHADER_ROOT_PARAMETER_TYPE_SRV = 0x3, + VKD3D_SHADER_ROOT_PARAMETER_TYPE_UAV = 0x4, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_ROOT_PARAMETER_TYPE), +}; + +struct vkd3d_shader_root_parameter +{ + enum vkd3d_shader_root_parameter_type parameter_type; + union + { + struct vkd3d_shader_root_descriptor_table descriptor_table; + struct vkd3d_shader_root_constants constants; + struct vkd3d_shader_root_descriptor descriptor; + } u; + enum vkd3d_shader_visibility shader_visibility; +}; + +enum vkd3d_shader_root_signature_flags +{ + VKD3D_SHADER_ROOT_SIGNATURE_FLAG_NONE = 0x00, + VKD3D_SHADER_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT = 0x01, + VKD3D_SHADER_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS = 0x02, + VKD3D_SHADER_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS = 0x04, + VKD3D_SHADER_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS = 0x08, + VKD3D_SHADER_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS = 0x10, + VKD3D_SHADER_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS = 0x20, + VKD3D_SHADER_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT = 0x40, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_ROOT_SIGNATURE_FLAGS), +}; + +struct vkd3d_shader_root_signature_desc +{ + unsigned int parameter_count; + const struct vkd3d_shader_root_parameter *parameters; + unsigned int static_sampler_count; + const struct vkd3d_shader_static_sampler_desc *static_samplers; + enum vkd3d_shader_root_signature_flags flags; +}; + +/* root signature 1.1 */ +enum vkd3d_shader_root_descriptor_flags +{ + VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_NONE = 0x0, + VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE = 0x2, + VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE = 0x4, + VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_STATIC = 0x8, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_ROOT_DESCRIPTOR_FLAGS), +}; + +enum vkd3d_shader_descriptor_range_flags +{ + VKD3D_SHADER_DESCRIPTOR_RANGE_FLAG_NONE = 0x0, + VKD3D_SHADER_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE = 0x1, + VKD3D_SHADER_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE = 0x2, + VKD3D_SHADER_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE = 0x4, + VKD3D_SHADER_DESCRIPTOR_RANGE_FLAG_DATA_STATIC = 0x8, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_DESCRIPTOR_RANGE_FLAGS), +}; + +struct vkd3d_shader_descriptor_range1 +{ + enum vkd3d_shader_descriptor_type range_type; + unsigned int descriptor_count; + unsigned int base_shader_register; + unsigned int register_space; + enum vkd3d_shader_descriptor_range_flags flags; + unsigned int descriptor_table_offset; +}; + +struct vkd3d_shader_root_descriptor_table1 +{ + unsigned int descriptor_range_count; + const struct vkd3d_shader_descriptor_range1 *descriptor_ranges; +}; + +struct vkd3d_shader_root_descriptor1 +{ + unsigned int shader_register; + unsigned int register_space; + enum vkd3d_shader_root_descriptor_flags flags; +}; + +struct vkd3d_shader_root_parameter1 +{ + enum vkd3d_shader_root_parameter_type parameter_type; + union + { + struct vkd3d_shader_root_descriptor_table1 descriptor_table; + struct vkd3d_shader_root_constants constants; + struct vkd3d_shader_root_descriptor1 descriptor; + } u; + enum vkd3d_shader_visibility shader_visibility; +}; + +struct vkd3d_shader_root_signature_desc1 +{ + unsigned int parameter_count; + const struct vkd3d_shader_root_parameter1 *parameters; + unsigned int static_sampler_count; + const struct vkd3d_shader_static_sampler_desc *static_samplers; + enum vkd3d_shader_root_signature_flags flags; +}; + +enum vkd3d_shader_root_signature_version +{ + VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0 = 0x1, + VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1 = 0x2, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_ROOT_SIGNATURE_VERSION), +}; + +struct vkd3d_shader_versioned_root_signature_desc +{ + enum vkd3d_shader_root_signature_version version; + union + { + struct vkd3d_shader_root_signature_desc v_1_0; + struct vkd3d_shader_root_signature_desc1 v_1_1; + } u; +}; + +/** + * The type of a shader resource, returned as part of struct + * vkd3d_shader_descriptor_info. + */ +enum vkd3d_shader_resource_type +{ + /** + * The type is invalid or not applicable for this descriptor. This value is + * returned for samplers. + */ + VKD3D_SHADER_RESOURCE_NONE = 0x0, + /** Dimensionless buffer. */ + VKD3D_SHADER_RESOURCE_BUFFER = 0x1, + /** 1-dimensional texture. */ + VKD3D_SHADER_RESOURCE_TEXTURE_1D = 0x2, + /** 2-dimensional texture. */ + VKD3D_SHADER_RESOURCE_TEXTURE_2D = 0x3, + /** Multisampled 2-dimensional texture. */ + VKD3D_SHADER_RESOURCE_TEXTURE_2DMS = 0x4, + /** 3-dimensional texture. */ + VKD3D_SHADER_RESOURCE_TEXTURE_3D = 0x5, + /** Cubemap texture. */ + VKD3D_SHADER_RESOURCE_TEXTURE_CUBE = 0x6, + /** 1-dimensional array texture. */ + VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY = 0x7, + /** 2-dimensional array texture. */ + VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY = 0x8, + /** Multisampled 2-dimensional array texture. */ + VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY = 0x9, + /** Cubemap array texture. */ + VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY = 0xa, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_RESOURCE_TYPE), +}; + +/** + * The type of the data contained in a shader resource, returned as part of + * struct vkd3d_shader_descriptor_info. All formats are 32-bit. + */ +enum vkd3d_shader_resource_data_type +{ + /** Unsigned normalized integer. */ + VKD3D_SHADER_RESOURCE_DATA_UNORM = 0x1, + /** Signed normalized integer. */ + VKD3D_SHADER_RESOURCE_DATA_SNORM = 0x2, + /** Signed integer. */ + VKD3D_SHADER_RESOURCE_DATA_INT = 0x3, + /** Unsigned integer. */ + VKD3D_SHADER_RESOURCE_DATA_UINT = 0x4, + /** IEEE single-precision floating-point. */ + VKD3D_SHADER_RESOURCE_DATA_FLOAT = 0x5, + /** Undefined/type-less. \since 1.3 */ + VKD3D_SHADER_RESOURCE_DATA_MIXED = 0x6, + /** IEEE double-precision floating-point. \since 1.3 */ + VKD3D_SHADER_RESOURCE_DATA_DOUBLE = 0x7, + /** Continuation of the previous component. For example, 64-bit + * double-precision floating-point data may be returned as two 32-bit + * components, with the first component (containing the LSB) specified as + * VKD3D_SHADER_RESOURCE_DATA_DOUBLE, and the second component specified + * as VKD3D_SHADER_RESOURCE_DATA_CONTINUED. \since 1.3 */ + VKD3D_SHADER_RESOURCE_DATA_CONTINUED = 0x8, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_RESOURCE_DATA_TYPE), +}; + +/** + * Additional flags describing a shader descriptor, returned as part of struct + * vkd3d_shader_descriptor_info. + */ +enum vkd3d_shader_descriptor_info_flag +{ + /** + * The descriptor is a UAV resource, whose counter is read from or written + * to by the shader. + */ + VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER = 0x00000001, + /** The descriptor is a UAV resource, which is read from by the shader. */ + VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ = 0x00000002, + /** The descriptor is a comparison sampler. */ + VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE = 0x00000004, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_DESCRIPTOR_INFO_FLAG), +}; + +/** + * Describes a single shader descriptor; returned as part of + * struct vkd3d_shader_scan_descriptor_info. + */ +struct vkd3d_shader_descriptor_info +{ + /** Type of the descriptor (for example, SRV, CBV, UAV, or sampler). */ + enum vkd3d_shader_descriptor_type type; + /** + * Register space of the resource, or 0 if the shader does not + * support multiple register spaces. + */ + unsigned int register_space; + /** Register index of the descriptor. */ + unsigned int register_index; + /** Resource type, if applicable, including its dimension. */ + enum vkd3d_shader_resource_type resource_type; + /** Data type contained in the resource (for example, float or integer). */ + enum vkd3d_shader_resource_data_type resource_data_type; + /** + * Bitwise combination of zero or more members of + * \ref vkd3d_shader_descriptor_info_flag. + */ + unsigned int flags; + /** + * Size of this descriptor array, or 1 if a single descriptor. + * For an unbounded array this value is ~0u. + */ + unsigned int count; +}; + +/** + * A chained structure enumerating the descriptors declared by a shader. + * + * This structure extends vkd3d_shader_compile_info. + */ +struct vkd3d_shader_scan_descriptor_info +{ + /** + * Input; must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO. + */ + enum vkd3d_shader_structure_type type; + /** Input; optional pointer to a structure containing further parameters. */ + const void *next; + + /** Output; returns a pointer to an array of descriptors. */ + struct vkd3d_shader_descriptor_info *descriptors; + /** Output; size, in elements, of \ref descriptors. */ + unsigned int descriptor_count; +}; + +/** + * Data type of a shader varying, returned as part of struct + * vkd3d_shader_signature_element. + */ +enum vkd3d_shader_component_type +{ + /** The varying has no type. */ + VKD3D_SHADER_COMPONENT_VOID = 0x0, + /** 32-bit unsigned integer. */ + VKD3D_SHADER_COMPONENT_UINT = 0x1, + /** 32-bit signed integer. */ + VKD3D_SHADER_COMPONENT_INT = 0x2, + /** 32-bit IEEE floating-point. */ + VKD3D_SHADER_COMPONENT_FLOAT = 0x3, + /** Boolean. */ + VKD3D_SHADER_COMPONENT_BOOL = 0x4, + /** 64-bit IEEE floating-point. */ + VKD3D_SHADER_COMPONENT_DOUBLE = 0x5, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPONENT_TYPE), +}; + +/** System value semantic, returned as part of struct vkd3d_shader_signature. */ +enum vkd3d_shader_sysval_semantic +{ + /** No system value. */ + VKD3D_SHADER_SV_NONE = 0x00, + /** Vertex position; SV_Position in Direct3D. */ + VKD3D_SHADER_SV_POSITION = 0x01, + /** Clip distance; SV_ClipDistance in Direct3D. */ + VKD3D_SHADER_SV_CLIP_DISTANCE = 0x02, + /** Cull distance; SV_CullDistance in Direct3D. */ + VKD3D_SHADER_SV_CULL_DISTANCE = 0x03, + /** Render target layer; SV_RenderTargetArrayIndex in Direct3D. */ + VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX = 0x04, + /** Viewport index; SV_ViewportArrayIndex in Direct3D. */ + VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX = 0x05, + /** Vertex ID; SV_VertexID in Direct3D. */ + VKD3D_SHADER_SV_VERTEX_ID = 0x06, + /** Primtive ID; SV_PrimitiveID in Direct3D. */ + VKD3D_SHADER_SV_PRIMITIVE_ID = 0x07, + /** Instance ID; SV_InstanceID in Direct3D. */ + VKD3D_SHADER_SV_INSTANCE_ID = 0x08, + /** Whether the triangle is front-facing; SV_IsFrontFace in Direct3D. */ + VKD3D_SHADER_SV_IS_FRONT_FACE = 0x09, + /** Sample index; SV_SampleIndex in Direct3D. */ + VKD3D_SHADER_SV_SAMPLE_INDEX = 0x0a, + VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE = 0x0b, + VKD3D_SHADER_SV_TESS_FACTOR_QUADINT = 0x0c, + VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE = 0x0d, + VKD3D_SHADER_SV_TESS_FACTOR_TRIINT = 0x0e, + VKD3D_SHADER_SV_TESS_FACTOR_LINEDET = 0x0f, + VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN = 0x10, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SYSVAL_SEMANTIC), +}; + +/** + * Minimum interpolation precision of a shader varying, returned as part of + * struct vkd3d_shader_signature_element. + */ +enum vkd3d_shader_minimum_precision +{ + VKD3D_SHADER_MINIMUM_PRECISION_NONE = 0, + /** 16-bit floating-point. */ + VKD3D_SHADER_MINIMUM_PRECISION_FLOAT_16 = 1, + /** 10-bit fixed point (2 integer and 8 fractional bits). */ + VKD3D_SHADER_MINIMUM_PRECISION_FIXED_8_2 = 2, + /** 16-bit signed integer. */ + VKD3D_SHADER_MINIMUM_PRECISION_INT_16 = 4, + /** 16-bit unsigned integer. */ + VKD3D_SHADER_MINIMUM_PRECISION_UINT_16 = 5, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_MINIMUM_PRECISION), +}; + +/** + * A single shader varying, returned as part of struct vkd3d_shader_signature. + */ +struct vkd3d_shader_signature_element +{ + /** Semantic name. */ + const char *semantic_name; + /** Semantic index, or 0 if the semantic is not indexed. */ + unsigned int semantic_index; + /** + * Stream index of a geometry shader output semantic. If the signature is + * not a geometry shader output signature, this field will be set to 0. + */ + unsigned int stream_index; + /** + * System value semantic. If the varying is not a system value, this field + * will be set to VKD3D_SHADER_SV_NONE. + */ + enum vkd3d_shader_sysval_semantic sysval_semantic; + /** Data type. */ + enum vkd3d_shader_component_type component_type; + /** Register index. */ + unsigned int register_index; + /** Mask of the register components allocated to this varying. */ + unsigned int mask; + /** + * Subset of \ref mask which the shader reads from or writes to. Unlike + * Direct3D shader bytecode, the mask for output and tessellation signatures + * is not inverted, i.e. bits set in this field denote components which are + * written to. + */ + unsigned int used_mask; + /** Minimum interpolation precision. */ + enum vkd3d_shader_minimum_precision min_precision; +}; + +/** + * Description of a shader input or output signature. This structure is + * populated by vkd3d_shader_parse_input_signature(). + * + * The helper function vkd3d_shader_find_signature_element() will look up a + * varying element by its semantic name, semantic index, and stream index. + */ +struct vkd3d_shader_signature +{ + /** Pointer to an array of varyings. */ + struct vkd3d_shader_signature_element *elements; + /** Size, in elements, of \ref elements. */ + unsigned int element_count; +}; + +/** Possible values for a single component of a vkd3d-shader swizzle. */ +enum vkd3d_shader_swizzle_component +{ + VKD3D_SHADER_SWIZZLE_X = 0x0, + VKD3D_SHADER_SWIZZLE_Y = 0x1, + VKD3D_SHADER_SWIZZLE_Z = 0x2, + VKD3D_SHADER_SWIZZLE_W = 0x3, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SWIZZLE_COMPONENT), +}; + +/** + * A mask selecting one component from a vkd3d-shader swizzle. The component has + * type \ref vkd3d_shader_swizzle_component. + */ +#define VKD3D_SHADER_SWIZZLE_MASK (0xffu) +/** The offset, in bits, of the nth parameter of a vkd3d-shader swizzle. */ +#define VKD3D_SHADER_SWIZZLE_SHIFT(idx) (8u * (idx)) + +/** + * A helper macro which returns a vkd3d-shader swizzle with the given + * components. The components are specified as the suffixes to members of + * \ref vkd3d_shader_swizzle_component. For example, the swizzle ".xwyy" can be + * represented as: + * \code + * VKD3D_SHADER_SWIZZLE(X, W, Y, Y) + * \endcode + */ +#define VKD3D_SHADER_SWIZZLE(x, y, z, w) \ + vkd3d_shader_create_swizzle(VKD3D_SHADER_SWIZZLE_ ## x, \ + VKD3D_SHADER_SWIZZLE_ ## y, \ + VKD3D_SHADER_SWIZZLE_ ## z, \ + VKD3D_SHADER_SWIZZLE_ ## w) + +/** The identity swizzle ".xyzw". */ +#define VKD3D_SHADER_NO_SWIZZLE VKD3D_SHADER_SWIZZLE(X, Y, Z, W) + +/** Build a vkd3d-shader swizzle with the given components. */ +static inline uint32_t vkd3d_shader_create_swizzle(enum vkd3d_shader_swizzle_component x, + enum vkd3d_shader_swizzle_component y, enum vkd3d_shader_swizzle_component z, + enum vkd3d_shader_swizzle_component w) +{ + return ((x & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(0)) + | ((y & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(1)) + | ((z & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(2)) + | ((w & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(3)); +} + +#ifdef LIBVKD3D_SHADER_SOURCE +# define VKD3D_SHADER_API VKD3D_EXPORT +#else +# define VKD3D_SHADER_API VKD3D_IMPORT +#endif + +#ifndef VKD3D_SHADER_NO_PROTOTYPES + +/** + * Returns the current version of this library. + * + * \param major Output location for the major version of this library. + * + * \param minor Output location for the minor version of this library. + * + * \return A human-readable string describing the library name and version. This + * string is null-terminated and UTF-8 encoded. This may be a pointer to static + * data in libvkd3d-shader; it should not be freed. + */ +VKD3D_SHADER_API const char *vkd3d_shader_get_version(unsigned int *major, unsigned int *minor); +/** + * Returns the source types supported, with any target type, by + * vkd3d_shader_compile(). Future versions of the library may introduce + * additional source types; callers should ignore unrecognised source types. + * + * Use vkd3d_shader_get_supported_target_types() to determine which target types + * are supported for each source type. + * + * \param count Output location for the size, in elements, of the returned + * array. + * + * \return Pointer to an array of source types supported by this version of + * vkd3d-shader. This array may be a pointer to static data in libvkd3d-shader; + * it should not be freed. + */ +VKD3D_SHADER_API const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(unsigned int *count); +/** + * Returns the target types supported, with the given source type, by + * vkd3d_shader_compile(). Future versions of the library may introduce + * additional target types; callers should ignore unrecognised target types. + * + * \param source_type Source type for which to enumerate supported target types. + * + * \param count Output location for the size, in elements, of the returned + * array. + * + * \return Pointer to an array of target types supported by this version of + * vkd3d-shader. This array may be a pointer to static data in libvkd3d-shader; + * it should not be freed. + */ +VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + enum vkd3d_shader_source_type source_type, unsigned int *count); + +/** + * Transform a form of GPU shader source code or byte code into another form of + * source code or byte code. + * + * This version of vkd3d-shader supports the following transformations: + * - VKD3D_SHADER_SOURCE_DXBC_TPF to VKD3D_SHADER_TARGET_SPIRV_BINARY + * + * Supported transformations can also be detected at runtime with the functions + * vkd3d_shader_get_supported_source_types() and + * vkd3d_shader_get_supported_target_types(). + * + * Depending on the source and target types, this function may support the + * following chained structures: + * - vkd3d_shader_interface_info + * - vkd3d_shader_spirv_domain_shader_target_info + * - vkd3d_shader_spirv_target_info + * - vkd3d_shader_transform_feedback_info + * + * \param compile_info A chained structure containing compilation parameters. + * + * \param out A pointer to a vkd3d_shader_code structure in which the compiled + * code will be stored. + * \n + * The compiled shader is allocated by vkd3d-shader and should be freed with + * vkd3d_shader_free_shader_code() when no longer needed. + * + * \param messages Optional output location for error or informational messages + * produced by the compiler. + * \n + * This string is null-terminated and UTF-8 encoded. + * \n + * The messages are allocated by vkd3d-shader and should be freed with + * vkd3d_shader_free_messages() when no longer needed. + * \n + * The messages returned can be regulated with the \a log_level member of struct + * vkd3d_shader_compile_info. Regardless of the requested level, if this + * parameter is NULL, no compilation messages will be returned. + * \n + * If no compilation messages are produced by the compiler, this parameter may + * receive NULL instead of a valid string pointer. + * + * \return A member of \ref vkd3d_result. + */ +VKD3D_SHADER_API int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, char **messages); +/** + * Free shader messages allocated by another vkd3d-shader function, such as + * vkd3d_shader_compile(). + * + * \param messages Messages to free. This pointer is optional and may be NULL, + * in which case no action will be taken. + */ +VKD3D_SHADER_API void vkd3d_shader_free_messages(char *messages); +/** + * Free shader code allocated by another vkd3d-shader function, such as + * vkd3d_shader_compile(). + * + * This function frees the \ref vkd3d_shader_code.code member, but does not free + * the structure itself. + * + * \param code Code to free. + */ +VKD3D_SHADER_API void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *code); + +/** + * Convert a byte code description of a shader root signature to a structural + * description which can be easily parsed by C code. + * + * This function corresponds to + * ID3D12VersionedRootSignatureDeserializer::GetUnconvertedRootSignatureDesc(). + * + * This function performs the reverse transformation of + * vkd3d_shader_serialize_root_signature(). + * + * This function parses a standalone root signature, and should not be confused + * with vkd3d_shader_parse_input_signature(). + * + * \param dxbc Compiled byte code, in DXBC format. + * + * \param root_signature Output location in which the decompiled root signature + * will be stored. + * \n + * Members of \a root_signature may be allocated by vkd3d-shader. The signature + * should be freed with vkd3d_shader_free_root_signature() when no longer + * needed. + * + * \param messages Optional output location for error or informational messages + * produced by the compiler. + * \n + * This parameter behaves identically to the \a messages parameter of + * vkd3d_shader_compile(). + * + * \return A member of \ref vkd3d_result. + */ +VKD3D_SHADER_API int vkd3d_shader_parse_root_signature(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_versioned_root_signature_desc *root_signature, char **messages); +/** + * Free a structural representation of a shader root signature allocated by + * vkd3d_shader_convert_root_signature() or vkd3d_shader_parse_root_signature(). + * + * This function may free members of struct + * vkd3d_shader_versioned_root_signature_desc, but does not free the structure + * itself. + * + * \param root_signature Signature description to free. + */ +VKD3D_SHADER_API void vkd3d_shader_free_root_signature( + struct vkd3d_shader_versioned_root_signature_desc *root_signature); + +/** + * Convert a structural description of a shader root signature to a byte code + * format capable of being read by ID3D12Device::CreateRootSignature. The + * compiled signature is compatible with Microsoft D3D 12. + * + * This function corresponds to D3D12SerializeVersionedRootSignature(). + * + * \param root_signature Description of the root signature. + * + * \param dxbc A pointer to a vkd3d_shader_code structure in which the compiled + * code will be stored. + * \n + * The compiled signature is allocated by vkd3d-shader and should be freed with + * vkd3d_shader_free_shader_code() when no longer needed. + * + * \param messages Optional output location for error or informational messages + * produced by the compiler. + * \n + * This parameter behaves identically to the \a messages parameter of + * vkd3d_shader_compile(). + * + * \return A member of \ref vkd3d_result. + */ +VKD3D_SHADER_API int vkd3d_shader_serialize_root_signature( + const struct vkd3d_shader_versioned_root_signature_desc *root_signature, + struct vkd3d_shader_code *dxbc, char **messages); +/** + * Convert a structural representation of a root signature to a different + * version of structural representation. + * + * This function corresponds to + * ID3D12VersionedRootSignatureDeserializer::GetRootSignatureDescAtVersion(). + * + * \param dst A pointer to a vkd3d_shader_versioned_root_signature_desc + * structure in which the converted signature will be stored. + * \n + * Members of \a dst may be allocated by vkd3d-shader. The signature should be + * freed with vkd3d_shader_free_root_signature() when no longer needed. + * + * \param version The desired version to convert \a src to. This version must + * not be equal to \a src->version. + * + * \param src Input root signature description. + * + * \return A member of \ref vkd3d_result. + */ +VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_versioned_root_signature_desc *dst, + enum vkd3d_shader_root_signature_version version, const struct vkd3d_shader_versioned_root_signature_desc *src); + +/** + * Parse shader source code or byte code, returning various types of requested + * information. + * + * Currently this function supports the following code types: + * - VKD3D_SHADER_SOURCE_DXBC_TPF + * + * \param compile_info A chained structure containing scan parameters. + * \n + * The DXBC_TPF scanner supports the following chained structures: + * - vkd3d_shader_scan_descriptor_info + * \n + * Although the \a compile_info parameter is read-only, chained structures + * passed to this function need not be, and may serve as output parameters, + * depending on their structure type. + * + * \param messages Optional output location for error or informational messages + * produced by the compiler. + * \n + * This parameter behaves identically to the \a messages parameter of + * vkd3d_shader_compile(). + * + * \return A member of \ref vkd3d_result. + */ +VKD3D_SHADER_API int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages); +/** + * Free members of struct vkd3d_shader_scan_descriptor_info() allocated by + * vkd3d_shader_scan(). + * + * This function may free members of vkd3d_shader_scan_descriptor_info, but + * does not free the structure itself. + * + * \param scan_descriptor_info Descriptor information to free. + */ +VKD3D_SHADER_API void vkd3d_shader_free_scan_descriptor_info( + struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info); + +/** + * Read the input signature of a compiled shader, returning a structural + * description which can be easily parsed by C code. + * + * This function parses a compiled shader. To parse a standalone root signature, + * use vkd3d_shader_parse_root_signature(). + * + * \param dxbc Compiled byte code, in DXBC format. + * + * \param signature Output location in which the parsed root signature will be + * stored. + * \n + * Members of \a signature may be allocated by vkd3d-shader. The signature + * should be freed with vkd3d_shader_free_shader_signature() when no longer + * needed. + * + * \param messages Optional output location for error or informational messages + * produced by the compiler. + * \n + * This parameter behaves identically to the \a messages parameter of + * vkd3d_shader_compile(). + * + * \return A member of \ref vkd3d_result. + */ +VKD3D_SHADER_API int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_signature *signature, char **messages); +/** + * Find a single element of a parsed input signature. + * + * \param signature The parsed input signature. This structure is normally + * populated by vkd3d_shader_parse_input_signature(). + * + * \param semantic_name Semantic name of the desired element. This function + * performs a case-insensitive comparison with respect to the ASCII plane. + * + * \param semantic_index Semantic index of the desired element. + * + * \param stream_index Geometry shader stream index of the desired element. If + * the signature is not a geometry shader output signature, this parameter must + * be set to 0. + * + * \return A description of the element matching the requested parameters, or + * NULL if no such element was found. If not NULL, the return value points into + * the \a signature parameter and should not be explicitly freed. + */ +VKD3D_SHADER_API struct vkd3d_shader_signature_element *vkd3d_shader_find_signature_element( + const struct vkd3d_shader_signature *signature, const char *semantic_name, + unsigned int semantic_index, unsigned int stream_index); +/** + * Free a structural representation of a shader input signature allocated by + * vkd3d_shader_parse_input_signature(). + * + * This function may free members of struct vkd3d_shader_signature, but does not + * free the structure itself. + * + * \param signature Signature description to free. + */ +VKD3D_SHADER_API void vkd3d_shader_free_shader_signature(struct vkd3d_shader_signature *signature); + +/* 1.3 */ + +/** + * Preprocess the given source code. + * + * This function supports the following chained structures: + * - vkd3d_shader_preprocess_info + * + * \param compile_info A chained structure containing compilation parameters. + * + * \param out A pointer to a vkd3d_shader_code structure in which the + * preprocessed code will be stored. + * \n + * The preprocessed shader is allocated by vkd3d-shader and should be freed with + * vkd3d_shader_free_shader_code() when no longer needed. + * + * \param messages Optional output location for error or informational messages + * produced by the compiler. + * \n + * This parameter behaves identically to the \a messages parameter of + * vkd3d_shader_compile(). + * + * \return A member of \ref vkd3d_result. + * + * \since 1.3 + */ +VKD3D_SHADER_API int vkd3d_shader_preprocess(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, char **messages); + +#endif /* VKD3D_SHADER_NO_PROTOTYPES */ + +/** Type of vkd3d_shader_get_version(). */ +typedef const char *(*PFN_vkd3d_shader_get_version)(unsigned int *major, unsigned int *minor); +/** Type of vkd3d_shader_get_supported_source_types(). */ +typedef const enum vkd3d_shader_source_type *(*PFN_vkd3d_shader_get_supported_source_types)(unsigned int *count); +/** Type of vkd3d_shader_get_supported_target_types(). */ +typedef const enum vkd3d_shader_target_type *(*PFN_vkd3d_shader_get_supported_target_types)( + enum vkd3d_shader_source_type source_type, unsigned int *count); + +/** Type of vkd3d_shader_compile(). */ +typedef int (*PFN_vkd3d_shader_compile)(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, char **messages); +/** Type of vkd3d_shader_free_messages(). */ +typedef void (*PFN_vkd3d_shader_free_messages)(char *messages); +/** Type of vkd3d_shader_free_shader_code(). */ +typedef void (*PFN_vkd3d_shader_free_shader_code)(struct vkd3d_shader_code *code); + +/** Type of vkd3d_shader_parse_root_signature(). */ +typedef int (*PFN_vkd3d_shader_parse_root_signature)(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_versioned_root_signature_desc *root_signature, char **messages); +/** Type of vkd3d_shader_free_root_signature(). */ +typedef void (*PFN_vkd3d_shader_free_root_signature)(struct vkd3d_shader_versioned_root_signature_desc *root_signature); + +/** Type of vkd3d_shader_serialize_root_signature(). */ +typedef int (*PFN_vkd3d_shader_serialize_root_signature)( + const struct vkd3d_shader_versioned_root_signature_desc *root_signature, + struct vkd3d_shader_code *dxbc, char **messages); + +/** Type of vkd3d_shader_convert_root_signature(). */ +typedef int (*PFN_vkd3d_shader_convert_root_signature)(struct vkd3d_shader_versioned_root_signature_desc *dst, + enum vkd3d_shader_root_signature_version version, const struct vkd3d_shader_versioned_root_signature_desc *src); + +/** Type of vkd3d_shader_scan(). */ +typedef int (*PFN_vkd3d_shader_scan)(const struct vkd3d_shader_compile_info *compile_info, char **messages); +/** Type of vkd3d_shader_free_scan_descriptor_info(). */ +typedef void (*PFN_vkd3d_shader_free_scan_descriptor_info)( + struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info); + +/** Type of vkd3d_shader_parse_input_signature(). */ +typedef int (*PFN_vkd3d_shader_parse_input_signature)(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_signature *signature, char **messages); +/** Type of vkd3d_shader_find_signature_element(). */ +typedef struct vkd3d_shader_signature_element * (*PFN_vkd3d_shader_find_signature_element)( + const struct vkd3d_shader_signature *signature, const char *semantic_name, + unsigned int semantic_index, unsigned int stream_index); +/** Type of vkd3d_shader_free_shader_signature(). */ +typedef void (*PFN_vkd3d_shader_free_shader_signature)(struct vkd3d_shader_signature *signature); + +/** Type of vkd3d_shader_preprocess(). \since 1.3 */ +typedef void (*PFN_vkd3d_shader_preprocess)(struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, char **messages); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __VKD3D_SHADER_H */ diff --git a/libs/vkd3d/include/vkd3d_types.h b/libs/vkd3d/include/vkd3d_types.h new file mode 100644 index 00000000000..0e4ff7b774a --- /dev/null +++ b/libs/vkd3d/include/vkd3d_types.h @@ -0,0 +1,71 @@ +/* + * Copyright 2016-2018 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_TYPES_H +#define __VKD3D_TYPES_H + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * \file vkd3d_types.h + * + * This file contains definitions for basic types used by vkd3d libraries. + */ + +#define VKD3D_FORCE_32_BIT_ENUM(name) name##_FORCE_32BIT = 0x7fffffff + +/** + * Result codes returned by some vkd3d functions. Error codes always have + * negative values; non-error codes never do. + */ +enum vkd3d_result +{ + /** Success. */ + VKD3D_OK = 0, + /** An unspecified failure occurred. */ + VKD3D_ERROR = -1, + /** There are not enough resources available to complete the operation. */ + VKD3D_ERROR_OUT_OF_MEMORY = -2, + /** One or more parameters passed to a vkd3d function were invalid. */ + VKD3D_ERROR_INVALID_ARGUMENT = -3, + /** A shader passed to a vkd3d function was invalid. */ + VKD3D_ERROR_INVALID_SHADER = -4, + /** The operation is not implemented in this version of vkd3d. */ + VKD3D_ERROR_NOT_IMPLEMENTED = -5, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_RESULT), +}; + +#ifdef _WIN32 +# define VKD3D_IMPORT +# define VKD3D_EXPORT +#elif defined(__GNUC__) +# define VKD3D_IMPORT +# define VKD3D_EXPORT __attribute__((visibility("default"))) +#else +# define VKD3D_IMPORT +# define VKD3D_EXPORT +#endif + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __VKD3D_TYPES_H */ diff --git a/libs/vkd3d/include/vulkan/GLSL.std.450.h b/libs/vkd3d/include/vulkan/GLSL.std.450.h new file mode 100644 index 00000000000..943fd8650f8 --- /dev/null +++ b/libs/vkd3d/include/vulkan/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 3; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/libs/vkd3d/include/vulkan/spirv.h b/libs/vkd3d/include/vulkan/spirv.h new file mode 100644 index 00000000000..bc20ad7fea9 --- /dev/null +++ b/libs/vkd3d/include/vulkan/spirv.h @@ -0,0 +1,2510 @@ +/* +** Copyright (c) 2014-2020 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ + +/* +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python, C#, D +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +** - C# will use enum classes in the Specification class located in the "Spv" namespace, +** e.g.: Spv.Specification.SourceLanguage.GLSL +** - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ + +#ifndef spirv_H +#define spirv_H + +typedef unsigned int SpvId; + +#define SPV_VERSION 0x10600 +#define SPV_REVISION 1 + +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 0x00010600; +static const unsigned int SpvRevision = 1; +static const unsigned int SpvOpCodeMask = 0xffff; +static const unsigned int SpvWordCountShift = 16; + +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL_C = 3, + SpvSourceLanguageOpenCL_CPP = 4, + SpvSourceLanguageHLSL = 5, + SpvSourceLanguageCPP_for_OpenCL = 6, + SpvSourceLanguageMax = 0x7fffffff, +} SpvSourceLanguage; + +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, + SpvExecutionModelTaskNV = 5267, + SpvExecutionModelMeshNV = 5268, + SpvExecutionModelRayGenerationKHR = 5313, + SpvExecutionModelRayGenerationNV = 5313, + SpvExecutionModelIntersectionKHR = 5314, + SpvExecutionModelIntersectionNV = 5314, + SpvExecutionModelAnyHitKHR = 5315, + SpvExecutionModelAnyHitNV = 5315, + SpvExecutionModelClosestHitKHR = 5316, + SpvExecutionModelClosestHitNV = 5316, + SpvExecutionModelMissKHR = 5317, + SpvExecutionModelMissNV = 5317, + SpvExecutionModelCallableKHR = 5318, + SpvExecutionModelCallableNV = 5318, + SpvExecutionModelMax = 0x7fffffff, +} SpvExecutionModel; + +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, + SpvAddressingModelPhysicalStorageBuffer64 = 5348, + SpvAddressingModelPhysicalStorageBuffer64EXT = 5348, + SpvAddressingModelMax = 0x7fffffff, +} SpvAddressingModel; + +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL = 2, + SpvMemoryModelVulkan = 3, + SpvMemoryModelVulkanKHR = 3, + SpvMemoryModelMax = 0x7fffffff, +} SpvMemoryModel; + +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeQuads = 24, + SpvExecutionModeIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, + SpvExecutionModeInitializer = 33, + SpvExecutionModeFinalizer = 34, + SpvExecutionModeSubgroupSize = 35, + SpvExecutionModeSubgroupsPerWorkgroup = 36, + SpvExecutionModeSubgroupsPerWorkgroupId = 37, + SpvExecutionModeLocalSizeId = 38, + SpvExecutionModeLocalSizeHintId = 39, + SpvExecutionModeSubgroupUniformControlFlowKHR = 4421, + SpvExecutionModePostDepthCoverage = 4446, + SpvExecutionModeDenormPreserve = 4459, + SpvExecutionModeDenormFlushToZero = 4460, + SpvExecutionModeSignedZeroInfNanPreserve = 4461, + SpvExecutionModeRoundingModeRTE = 4462, + SpvExecutionModeRoundingModeRTZ = 4463, + SpvExecutionModeStencilRefReplacingEXT = 5027, + SpvExecutionModeOutputLinesNV = 5269, + SpvExecutionModeOutputPrimitivesNV = 5270, + SpvExecutionModeDerivativeGroupQuadsNV = 5289, + SpvExecutionModeDerivativeGroupLinearNV = 5290, + SpvExecutionModeOutputTrianglesNV = 5298, + SpvExecutionModePixelInterlockOrderedEXT = 5366, + SpvExecutionModePixelInterlockUnorderedEXT = 5367, + SpvExecutionModeSampleInterlockOrderedEXT = 5368, + SpvExecutionModeSampleInterlockUnorderedEXT = 5369, + SpvExecutionModeShadingRateInterlockOrderedEXT = 5370, + SpvExecutionModeShadingRateInterlockUnorderedEXT = 5371, + SpvExecutionModeSharedLocalMemorySizeINTEL = 5618, + SpvExecutionModeRoundingModeRTPINTEL = 5620, + SpvExecutionModeRoundingModeRTNINTEL = 5621, + SpvExecutionModeFloatingPointModeALTINTEL = 5622, + SpvExecutionModeFloatingPointModeIEEEINTEL = 5623, + SpvExecutionModeMaxWorkgroupSizeINTEL = 5893, + SpvExecutionModeMaxWorkDimINTEL = 5894, + SpvExecutionModeNoGlobalOffsetINTEL = 5895, + SpvExecutionModeNumSIMDWorkitemsINTEL = 5896, + SpvExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + SpvExecutionModeMax = 0x7fffffff, +} SpvExecutionMode; + +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroup = 4, + SpvStorageClassCrossWorkgroup = 5, + SpvStorageClassPrivate = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPushConstant = 9, + SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, + SpvStorageClassStorageBuffer = 12, + SpvStorageClassCallableDataKHR = 5328, + SpvStorageClassCallableDataNV = 5328, + SpvStorageClassIncomingCallableDataKHR = 5329, + SpvStorageClassIncomingCallableDataNV = 5329, + SpvStorageClassRayPayloadKHR = 5338, + SpvStorageClassRayPayloadNV = 5338, + SpvStorageClassHitAttributeKHR = 5339, + SpvStorageClassHitAttributeNV = 5339, + SpvStorageClassIncomingRayPayloadKHR = 5342, + SpvStorageClassIncomingRayPayloadNV = 5342, + SpvStorageClassShaderRecordBufferKHR = 5343, + SpvStorageClassShaderRecordBufferNV = 5343, + SpvStorageClassPhysicalStorageBuffer = 5349, + SpvStorageClassPhysicalStorageBufferEXT = 5349, + SpvStorageClassCodeSectionINTEL = 5605, + SpvStorageClassDeviceOnlyINTEL = 5936, + SpvStorageClassHostOnlyINTEL = 5937, + SpvStorageClassMax = 0x7fffffff, +} SpvStorageClass; + +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, + SpvDimSubpassData = 6, + SpvDimMax = 0x7fffffff, +} SpvDim; + +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, + SpvSamplerAddressingModeMax = 0x7fffffff, +} SpvSamplerAddressingMode; + +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, + SpvSamplerFilterModeMax = 0x7fffffff, +} SpvSamplerFilterMode; + +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, + SpvImageFormatR64ui = 40, + SpvImageFormatR64i = 41, + SpvImageFormatMax = 0x7fffffff, +} SpvImageFormat; + +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, + SpvImageChannelOrderABGR = 19, + SpvImageChannelOrderMax = 0x7fffffff, +} SpvImageChannelOrder; + +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, + SpvImageChannelDataTypeUnormInt101010_2 = 16, + SpvImageChannelDataTypeMax = 0x7fffffff, +} SpvImageChannelDataType; + +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsConstOffsetShift = 3, + SpvImageOperandsOffsetShift = 4, + SpvImageOperandsConstOffsetsShift = 5, + SpvImageOperandsSampleShift = 6, + SpvImageOperandsMinLodShift = 7, + SpvImageOperandsMakeTexelAvailableShift = 8, + SpvImageOperandsMakeTexelAvailableKHRShift = 8, + SpvImageOperandsMakeTexelVisibleShift = 9, + SpvImageOperandsMakeTexelVisibleKHRShift = 9, + SpvImageOperandsNonPrivateTexelShift = 10, + SpvImageOperandsNonPrivateTexelKHRShift = 10, + SpvImageOperandsVolatileTexelShift = 11, + SpvImageOperandsVolatileTexelKHRShift = 11, + SpvImageOperandsSignExtendShift = 12, + SpvImageOperandsZeroExtendShift = 13, + SpvImageOperandsNontemporalShift = 14, + SpvImageOperandsOffsetsShift = 16, + SpvImageOperandsMax = 0x7fffffff, +} SpvImageOperandsShift; + +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsConstOffsetMask = 0x00000008, + SpvImageOperandsOffsetMask = 0x00000010, + SpvImageOperandsConstOffsetsMask = 0x00000020, + SpvImageOperandsSampleMask = 0x00000040, + SpvImageOperandsMinLodMask = 0x00000080, + SpvImageOperandsMakeTexelAvailableMask = 0x00000100, + SpvImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + SpvImageOperandsMakeTexelVisibleMask = 0x00000200, + SpvImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + SpvImageOperandsNonPrivateTexelMask = 0x00000400, + SpvImageOperandsNonPrivateTexelKHRMask = 0x00000400, + SpvImageOperandsVolatileTexelMask = 0x00000800, + SpvImageOperandsVolatileTexelKHRMask = 0x00000800, + SpvImageOperandsSignExtendMask = 0x00001000, + SpvImageOperandsZeroExtendMask = 0x00002000, + SpvImageOperandsNontemporalMask = 0x00004000, + SpvImageOperandsOffsetsMask = 0x00010000, +} SpvImageOperandsMask; + +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, + SpvFPFastMathModeAllowContractFastINTELShift = 16, + SpvFPFastMathModeAllowReassocINTELShift = 17, + SpvFPFastMathModeMax = 0x7fffffff, +} SpvFPFastMathModeShift; + +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, + SpvFPFastMathModeAllowContractFastINTELMask = 0x00010000, + SpvFPFastMathModeAllowReassocINTELMask = 0x00020000, +} SpvFPFastMathModeMask; + +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, + SpvFPRoundingModeMax = 0x7fffffff, +} SpvFPRoundingMode; + +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, + SpvLinkageTypeLinkOnceODR = 2, + SpvLinkageTypeMax = 0x7fffffff, +} SpvLinkageType; + +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, + SpvAccessQualifierMax = 0x7fffffff, +} SpvAccessQualifier; + +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeNoWrite = 6, + SpvFunctionParameterAttributeNoReadWrite = 7, + SpvFunctionParameterAttributeMax = 0x7fffffff, +} SpvFunctionParameterAttribute; + +typedef enum SpvDecoration_ { + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationNoPerspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonWritable = 24, + SpvDecorationNonReadable = 25, + SpvDecorationUniform = 26, + SpvDecorationUniformId = 27, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, + SpvDecorationNoContraction = 42, + SpvDecorationInputAttachmentIndex = 43, + SpvDecorationAlignment = 44, + SpvDecorationMaxByteOffset = 45, + SpvDecorationAlignmentId = 46, + SpvDecorationMaxByteOffsetId = 47, + SpvDecorationNoSignedWrap = 4469, + SpvDecorationNoUnsignedWrap = 4470, + SpvDecorationExplicitInterpAMD = 4999, + SpvDecorationOverrideCoverageNV = 5248, + SpvDecorationPassthroughNV = 5250, + SpvDecorationViewportRelativeNV = 5252, + SpvDecorationSecondaryViewportRelativeNV = 5256, + SpvDecorationPerPrimitiveNV = 5271, + SpvDecorationPerViewNV = 5272, + SpvDecorationPerTaskNV = 5273, + SpvDecorationPerVertexKHR = 5285, + SpvDecorationPerVertexNV = 5285, + SpvDecorationNonUniform = 5300, + SpvDecorationNonUniformEXT = 5300, + SpvDecorationRestrictPointer = 5355, + SpvDecorationRestrictPointerEXT = 5355, + SpvDecorationAliasedPointer = 5356, + SpvDecorationAliasedPointerEXT = 5356, + SpvDecorationBindlessSamplerNV = 5398, + SpvDecorationBindlessImageNV = 5399, + SpvDecorationBoundSamplerNV = 5400, + SpvDecorationBoundImageNV = 5401, + SpvDecorationSIMTCallINTEL = 5599, + SpvDecorationReferencedIndirectlyINTEL = 5602, + SpvDecorationClobberINTEL = 5607, + SpvDecorationSideEffectsINTEL = 5608, + SpvDecorationVectorComputeVariableINTEL = 5624, + SpvDecorationFuncParamIOKindINTEL = 5625, + SpvDecorationVectorComputeFunctionINTEL = 5626, + SpvDecorationStackCallINTEL = 5627, + SpvDecorationGlobalVariableOffsetINTEL = 5628, + SpvDecorationCounterBuffer = 5634, + SpvDecorationHlslCounterBufferGOOGLE = 5634, + SpvDecorationHlslSemanticGOOGLE = 5635, + SpvDecorationUserSemantic = 5635, + SpvDecorationUserTypeGOOGLE = 5636, + SpvDecorationFunctionRoundingModeINTEL = 5822, + SpvDecorationFunctionDenormModeINTEL = 5823, + SpvDecorationRegisterINTEL = 5825, + SpvDecorationMemoryINTEL = 5826, + SpvDecorationNumbanksINTEL = 5827, + SpvDecorationBankwidthINTEL = 5828, + SpvDecorationMaxPrivateCopiesINTEL = 5829, + SpvDecorationSinglepumpINTEL = 5830, + SpvDecorationDoublepumpINTEL = 5831, + SpvDecorationMaxReplicatesINTEL = 5832, + SpvDecorationSimpleDualPortINTEL = 5833, + SpvDecorationMergeINTEL = 5834, + SpvDecorationBankBitsINTEL = 5835, + SpvDecorationForcePow2DepthINTEL = 5836, + SpvDecorationBurstCoalesceINTEL = 5899, + SpvDecorationCacheSizeINTEL = 5900, + SpvDecorationDontStaticallyCoalesceINTEL = 5901, + SpvDecorationPrefetchINTEL = 5902, + SpvDecorationStallEnableINTEL = 5905, + SpvDecorationFuseLoopsInFunctionINTEL = 5907, + SpvDecorationAliasScopeINTEL = 5914, + SpvDecorationNoAliasINTEL = 5915, + SpvDecorationBufferLocationINTEL = 5921, + SpvDecorationIOPipeStorageINTEL = 5944, + SpvDecorationFunctionFloatingPointModeINTEL = 6080, + SpvDecorationSingleElementVectorINTEL = 6085, + SpvDecorationVectorComputeCallableFunctionINTEL = 6087, + SpvDecorationMediaBlockIOINTEL = 6140, + SpvDecorationMax = 0x7fffffff, +} SpvDecoration; + +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, + SpvBuiltInVertexIndex = 42, + SpvBuiltInInstanceIndex = 43, + SpvBuiltInSubgroupEqMask = 4416, + SpvBuiltInSubgroupEqMaskKHR = 4416, + SpvBuiltInSubgroupGeMask = 4417, + SpvBuiltInSubgroupGeMaskKHR = 4417, + SpvBuiltInSubgroupGtMask = 4418, + SpvBuiltInSubgroupGtMaskKHR = 4418, + SpvBuiltInSubgroupLeMask = 4419, + SpvBuiltInSubgroupLeMaskKHR = 4419, + SpvBuiltInSubgroupLtMask = 4420, + SpvBuiltInSubgroupLtMaskKHR = 4420, + SpvBuiltInBaseVertex = 4424, + SpvBuiltInBaseInstance = 4425, + SpvBuiltInDrawIndex = 4426, + SpvBuiltInPrimitiveShadingRateKHR = 4432, + SpvBuiltInDeviceIndex = 4438, + SpvBuiltInViewIndex = 4440, + SpvBuiltInShadingRateKHR = 4444, + SpvBuiltInBaryCoordNoPerspAMD = 4992, + SpvBuiltInBaryCoordNoPerspCentroidAMD = 4993, + SpvBuiltInBaryCoordNoPerspSampleAMD = 4994, + SpvBuiltInBaryCoordSmoothAMD = 4995, + SpvBuiltInBaryCoordSmoothCentroidAMD = 4996, + SpvBuiltInBaryCoordSmoothSampleAMD = 4997, + SpvBuiltInBaryCoordPullModelAMD = 4998, + SpvBuiltInFragStencilRefEXT = 5014, + SpvBuiltInViewportMaskNV = 5253, + SpvBuiltInSecondaryPositionNV = 5257, + SpvBuiltInSecondaryViewportMaskNV = 5258, + SpvBuiltInPositionPerViewNV = 5261, + SpvBuiltInViewportMaskPerViewNV = 5262, + SpvBuiltInFullyCoveredEXT = 5264, + SpvBuiltInTaskCountNV = 5274, + SpvBuiltInPrimitiveCountNV = 5275, + SpvBuiltInPrimitiveIndicesNV = 5276, + SpvBuiltInClipDistancePerViewNV = 5277, + SpvBuiltInCullDistancePerViewNV = 5278, + SpvBuiltInLayerPerViewNV = 5279, + SpvBuiltInMeshViewCountNV = 5280, + SpvBuiltInMeshViewIndicesNV = 5281, + SpvBuiltInBaryCoordKHR = 5286, + SpvBuiltInBaryCoordNV = 5286, + SpvBuiltInBaryCoordNoPerspKHR = 5287, + SpvBuiltInBaryCoordNoPerspNV = 5287, + SpvBuiltInFragSizeEXT = 5292, + SpvBuiltInFragmentSizeNV = 5292, + SpvBuiltInFragInvocationCountEXT = 5293, + SpvBuiltInInvocationsPerPixelNV = 5293, + SpvBuiltInLaunchIdKHR = 5319, + SpvBuiltInLaunchIdNV = 5319, + SpvBuiltInLaunchSizeKHR = 5320, + SpvBuiltInLaunchSizeNV = 5320, + SpvBuiltInWorldRayOriginKHR = 5321, + SpvBuiltInWorldRayOriginNV = 5321, + SpvBuiltInWorldRayDirectionKHR = 5322, + SpvBuiltInWorldRayDirectionNV = 5322, + SpvBuiltInObjectRayOriginKHR = 5323, + SpvBuiltInObjectRayOriginNV = 5323, + SpvBuiltInObjectRayDirectionKHR = 5324, + SpvBuiltInObjectRayDirectionNV = 5324, + SpvBuiltInRayTminKHR = 5325, + SpvBuiltInRayTminNV = 5325, + SpvBuiltInRayTmaxKHR = 5326, + SpvBuiltInRayTmaxNV = 5326, + SpvBuiltInInstanceCustomIndexKHR = 5327, + SpvBuiltInInstanceCustomIndexNV = 5327, + SpvBuiltInObjectToWorldKHR = 5330, + SpvBuiltInObjectToWorldNV = 5330, + SpvBuiltInWorldToObjectKHR = 5331, + SpvBuiltInWorldToObjectNV = 5331, + SpvBuiltInHitTNV = 5332, + SpvBuiltInHitKindKHR = 5333, + SpvBuiltInHitKindNV = 5333, + SpvBuiltInCurrentRayTimeNV = 5334, + SpvBuiltInIncomingRayFlagsKHR = 5351, + SpvBuiltInIncomingRayFlagsNV = 5351, + SpvBuiltInRayGeometryIndexKHR = 5352, + SpvBuiltInWarpsPerSMNV = 5374, + SpvBuiltInSMCountNV = 5375, + SpvBuiltInWarpIDNV = 5376, + SpvBuiltInSMIDNV = 5377, + SpvBuiltInMax = 0x7fffffff, +} SpvBuiltIn; + +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, + SpvSelectionControlMax = 0x7fffffff, +} SpvSelectionControlShift; + +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; + +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, + SpvLoopControlDependencyInfiniteShift = 2, + SpvLoopControlDependencyLengthShift = 3, + SpvLoopControlMinIterationsShift = 4, + SpvLoopControlMaxIterationsShift = 5, + SpvLoopControlIterationMultipleShift = 6, + SpvLoopControlPeelCountShift = 7, + SpvLoopControlPartialCountShift = 8, + SpvLoopControlInitiationIntervalINTELShift = 16, + SpvLoopControlMaxConcurrencyINTELShift = 17, + SpvLoopControlDependencyArrayINTELShift = 18, + SpvLoopControlPipelineEnableINTELShift = 19, + SpvLoopControlLoopCoalesceINTELShift = 20, + SpvLoopControlMaxInterleavingINTELShift = 21, + SpvLoopControlSpeculatedIterationsINTELShift = 22, + SpvLoopControlNoFusionINTELShift = 23, + SpvLoopControlMax = 0x7fffffff, +} SpvLoopControlShift; + +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, + SpvLoopControlDependencyInfiniteMask = 0x00000004, + SpvLoopControlDependencyLengthMask = 0x00000008, + SpvLoopControlMinIterationsMask = 0x00000010, + SpvLoopControlMaxIterationsMask = 0x00000020, + SpvLoopControlIterationMultipleMask = 0x00000040, + SpvLoopControlPeelCountMask = 0x00000080, + SpvLoopControlPartialCountMask = 0x00000100, + SpvLoopControlInitiationIntervalINTELMask = 0x00010000, + SpvLoopControlMaxConcurrencyINTELMask = 0x00020000, + SpvLoopControlDependencyArrayINTELMask = 0x00040000, + SpvLoopControlPipelineEnableINTELMask = 0x00080000, + SpvLoopControlLoopCoalesceINTELMask = 0x00100000, + SpvLoopControlMaxInterleavingINTELMask = 0x00200000, + SpvLoopControlSpeculatedIterationsINTELMask = 0x00400000, + SpvLoopControlNoFusionINTELMask = 0x00800000, +} SpvLoopControlMask; + +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, + SpvFunctionControlOptNoneINTELShift = 16, + SpvFunctionControlMax = 0x7fffffff, +} SpvFunctionControlShift; + +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, + SpvFunctionControlOptNoneINTELMask = 0x00010000, +} SpvFunctionControlMask; + +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsAcquireShift = 1, + SpvMemorySemanticsReleaseShift = 2, + SpvMemorySemanticsAcquireReleaseShift = 3, + SpvMemorySemanticsSequentiallyConsistentShift = 4, + SpvMemorySemanticsUniformMemoryShift = 6, + SpvMemorySemanticsSubgroupMemoryShift = 7, + SpvMemorySemanticsWorkgroupMemoryShift = 8, + SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, + SpvMemorySemanticsAtomicCounterMemoryShift = 10, + SpvMemorySemanticsImageMemoryShift = 11, + SpvMemorySemanticsOutputMemoryShift = 12, + SpvMemorySemanticsOutputMemoryKHRShift = 12, + SpvMemorySemanticsMakeAvailableShift = 13, + SpvMemorySemanticsMakeAvailableKHRShift = 13, + SpvMemorySemanticsMakeVisibleShift = 14, + SpvMemorySemanticsMakeVisibleKHRShift = 14, + SpvMemorySemanticsVolatileShift = 15, + SpvMemorySemanticsMax = 0x7fffffff, +} SpvMemorySemanticsShift; + +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsAcquireMask = 0x00000002, + SpvMemorySemanticsReleaseMask = 0x00000004, + SpvMemorySemanticsAcquireReleaseMask = 0x00000008, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, + SpvMemorySemanticsUniformMemoryMask = 0x00000040, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, + SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, + SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, + SpvMemorySemanticsImageMemoryMask = 0x00000800, + SpvMemorySemanticsOutputMemoryMask = 0x00001000, + SpvMemorySemanticsOutputMemoryKHRMask = 0x00001000, + SpvMemorySemanticsMakeAvailableMask = 0x00002000, + SpvMemorySemanticsMakeAvailableKHRMask = 0x00002000, + SpvMemorySemanticsMakeVisibleMask = 0x00004000, + SpvMemorySemanticsMakeVisibleKHRMask = 0x00004000, + SpvMemorySemanticsVolatileMask = 0x00008000, +} SpvMemorySemanticsMask; + +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, + SpvMemoryAccessNontemporalShift = 2, + SpvMemoryAccessMakePointerAvailableShift = 3, + SpvMemoryAccessMakePointerAvailableKHRShift = 3, + SpvMemoryAccessMakePointerVisibleShift = 4, + SpvMemoryAccessMakePointerVisibleKHRShift = 4, + SpvMemoryAccessNonPrivatePointerShift = 5, + SpvMemoryAccessNonPrivatePointerKHRShift = 5, + SpvMemoryAccessAliasScopeINTELMaskShift = 16, + SpvMemoryAccessNoAliasINTELMaskShift = 17, + SpvMemoryAccessMax = 0x7fffffff, +} SpvMemoryAccessShift; + +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, + SpvMemoryAccessNontemporalMask = 0x00000004, + SpvMemoryAccessMakePointerAvailableMask = 0x00000008, + SpvMemoryAccessMakePointerAvailableKHRMask = 0x00000008, + SpvMemoryAccessMakePointerVisibleMask = 0x00000010, + SpvMemoryAccessMakePointerVisibleKHRMask = 0x00000010, + SpvMemoryAccessNonPrivatePointerMask = 0x00000020, + SpvMemoryAccessNonPrivatePointerKHRMask = 0x00000020, + SpvMemoryAccessAliasScopeINTELMaskMask = 0x00010000, + SpvMemoryAccessNoAliasINTELMaskMask = 0x00020000, +} SpvMemoryAccessMask; + +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, + SpvScopeQueueFamily = 5, + SpvScopeQueueFamilyKHR = 5, + SpvScopeShaderCallKHR = 6, + SpvScopeMax = 0x7fffffff, +} SpvScope; + +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, + SpvGroupOperationClusteredReduce = 3, + SpvGroupOperationPartitionedReduceNV = 6, + SpvGroupOperationPartitionedInclusiveScanNV = 7, + SpvGroupOperationPartitionedExclusiveScanNV = 8, + SpvGroupOperationMax = 0x7fffffff, +} SpvGroupOperation; + +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, + SpvKernelEnqueueFlagsMax = 0x7fffffff, +} SpvKernelEnqueueFlags; + +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, + SpvKernelProfilingInfoMax = 0x7fffffff, +} SpvKernelProfilingInfoShift; + +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; + +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, + SpvCapabilityTessellationPointSize = 23, + SpvCapabilityGeometryPointSize = 24, + SpvCapabilityImageGatherExtended = 25, + SpvCapabilityStorageImageMultisample = 27, + SpvCapabilityUniformBufferArrayDynamicIndexing = 28, + SpvCapabilitySampledImageArrayDynamicIndexing = 29, + SpvCapabilityStorageBufferArrayDynamicIndexing = 30, + SpvCapabilityStorageImageArrayDynamicIndexing = 31, + SpvCapabilityClipDistance = 32, + SpvCapabilityCullDistance = 33, + SpvCapabilityImageCubeArray = 34, + SpvCapabilitySampleRateShading = 35, + SpvCapabilityImageRect = 36, + SpvCapabilitySampledRect = 37, + SpvCapabilityGenericPointer = 38, + SpvCapabilityInt8 = 39, + SpvCapabilityInputAttachment = 40, + SpvCapabilitySparseResidency = 41, + SpvCapabilityMinLod = 42, + SpvCapabilitySampled1D = 43, + SpvCapabilityImage1D = 44, + SpvCapabilitySampledCubeArray = 45, + SpvCapabilitySampledBuffer = 46, + SpvCapabilityImageBuffer = 47, + SpvCapabilityImageMSArray = 48, + SpvCapabilityStorageImageExtendedFormats = 49, + SpvCapabilityImageQuery = 50, + SpvCapabilityDerivativeControl = 51, + SpvCapabilityInterpolationFunction = 52, + SpvCapabilityTransformFeedback = 53, + SpvCapabilityGeometryStreams = 54, + SpvCapabilityStorageImageReadWithoutFormat = 55, + SpvCapabilityStorageImageWriteWithoutFormat = 56, + SpvCapabilityMultiViewport = 57, + SpvCapabilitySubgroupDispatch = 58, + SpvCapabilityNamedBarrier = 59, + SpvCapabilityPipeStorage = 60, + SpvCapabilityGroupNonUniform = 61, + SpvCapabilityGroupNonUniformVote = 62, + SpvCapabilityGroupNonUniformArithmetic = 63, + SpvCapabilityGroupNonUniformBallot = 64, + SpvCapabilityGroupNonUniformShuffle = 65, + SpvCapabilityGroupNonUniformShuffleRelative = 66, + SpvCapabilityGroupNonUniformClustered = 67, + SpvCapabilityGroupNonUniformQuad = 68, + SpvCapabilityShaderLayer = 69, + SpvCapabilityShaderViewportIndex = 70, + SpvCapabilityUniformDecoration = 71, + SpvCapabilityFragmentShadingRateKHR = 4422, + SpvCapabilitySubgroupBallotKHR = 4423, + SpvCapabilityDrawParameters = 4427, + SpvCapabilityWorkgroupMemoryExplicitLayoutKHR = 4428, + SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR = 4429, + SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR = 4430, + SpvCapabilitySubgroupVoteKHR = 4431, + SpvCapabilityStorageBuffer16BitAccess = 4433, + SpvCapabilityStorageUniformBufferBlock16 = 4433, + SpvCapabilityStorageUniform16 = 4434, + SpvCapabilityUniformAndStorageBuffer16BitAccess = 4434, + SpvCapabilityStoragePushConstant16 = 4435, + SpvCapabilityStorageInputOutput16 = 4436, + SpvCapabilityDeviceGroup = 4437, + SpvCapabilityMultiView = 4439, + SpvCapabilityVariablePointersStorageBuffer = 4441, + SpvCapabilityVariablePointers = 4442, + SpvCapabilityAtomicStorageOps = 4445, + SpvCapabilitySampleMaskPostDepthCoverage = 4447, + SpvCapabilityStorageBuffer8BitAccess = 4448, + SpvCapabilityUniformAndStorageBuffer8BitAccess = 4449, + SpvCapabilityStoragePushConstant8 = 4450, + SpvCapabilityDenormPreserve = 4464, + SpvCapabilityDenormFlushToZero = 4465, + SpvCapabilitySignedZeroInfNanPreserve = 4466, + SpvCapabilityRoundingModeRTE = 4467, + SpvCapabilityRoundingModeRTZ = 4468, + SpvCapabilityRayQueryProvisionalKHR = 4471, + SpvCapabilityRayQueryKHR = 4472, + SpvCapabilityRayTraversalPrimitiveCullingKHR = 4478, + SpvCapabilityRayTracingKHR = 4479, + SpvCapabilityFloat16ImageAMD = 5008, + SpvCapabilityImageGatherBiasLodAMD = 5009, + SpvCapabilityFragmentMaskAMD = 5010, + SpvCapabilityStencilExportEXT = 5013, + SpvCapabilityImageReadWriteLodAMD = 5015, + SpvCapabilityInt64ImageEXT = 5016, + SpvCapabilityShaderClockKHR = 5055, + SpvCapabilitySampleMaskOverrideCoverageNV = 5249, + SpvCapabilityGeometryShaderPassthroughNV = 5251, + SpvCapabilityShaderViewportIndexLayerEXT = 5254, + SpvCapabilityShaderViewportIndexLayerNV = 5254, + SpvCapabilityShaderViewportMaskNV = 5255, + SpvCapabilityShaderStereoViewNV = 5259, + SpvCapabilityPerViewAttributesNV = 5260, + SpvCapabilityFragmentFullyCoveredEXT = 5265, + SpvCapabilityMeshShadingNV = 5266, + SpvCapabilityImageFootprintNV = 5282, + SpvCapabilityFragmentBarycentricKHR = 5284, + SpvCapabilityFragmentBarycentricNV = 5284, + SpvCapabilityComputeDerivativeGroupQuadsNV = 5288, + SpvCapabilityFragmentDensityEXT = 5291, + SpvCapabilityShadingRateNV = 5291, + SpvCapabilityGroupNonUniformPartitionedNV = 5297, + SpvCapabilityShaderNonUniform = 5301, + SpvCapabilityShaderNonUniformEXT = 5301, + SpvCapabilityRuntimeDescriptorArray = 5302, + SpvCapabilityRuntimeDescriptorArrayEXT = 5302, + SpvCapabilityInputAttachmentArrayDynamicIndexing = 5303, + SpvCapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + SpvCapabilityUniformTexelBufferArrayDynamicIndexing = 5304, + SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + SpvCapabilityStorageTexelBufferArrayDynamicIndexing = 5305, + SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + SpvCapabilityUniformBufferArrayNonUniformIndexing = 5306, + SpvCapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + SpvCapabilitySampledImageArrayNonUniformIndexing = 5307, + SpvCapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + SpvCapabilityStorageBufferArrayNonUniformIndexing = 5308, + SpvCapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + SpvCapabilityStorageImageArrayNonUniformIndexing = 5309, + SpvCapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + SpvCapabilityInputAttachmentArrayNonUniformIndexing = 5310, + SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + SpvCapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, + SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + SpvCapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, + SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, + SpvCapabilityRayTracingNV = 5340, + SpvCapabilityRayTracingMotionBlurNV = 5341, + SpvCapabilityVulkanMemoryModel = 5345, + SpvCapabilityVulkanMemoryModelKHR = 5345, + SpvCapabilityVulkanMemoryModelDeviceScope = 5346, + SpvCapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + SpvCapabilityPhysicalStorageBufferAddresses = 5347, + SpvCapabilityPhysicalStorageBufferAddressesEXT = 5347, + SpvCapabilityComputeDerivativeGroupLinearNV = 5350, + SpvCapabilityRayTracingProvisionalKHR = 5353, + SpvCapabilityCooperativeMatrixNV = 5357, + SpvCapabilityFragmentShaderSampleInterlockEXT = 5363, + SpvCapabilityFragmentShaderShadingRateInterlockEXT = 5372, + SpvCapabilityShaderSMBuiltinsNV = 5373, + SpvCapabilityFragmentShaderPixelInterlockEXT = 5378, + SpvCapabilityDemoteToHelperInvocation = 5379, + SpvCapabilityDemoteToHelperInvocationEXT = 5379, + SpvCapabilityBindlessTextureNV = 5390, + SpvCapabilitySubgroupShuffleINTEL = 5568, + SpvCapabilitySubgroupBufferBlockIOINTEL = 5569, + SpvCapabilitySubgroupImageBlockIOINTEL = 5570, + SpvCapabilitySubgroupImageMediaBlockIOINTEL = 5579, + SpvCapabilityRoundToInfinityINTEL = 5582, + SpvCapabilityFloatingPointModeINTEL = 5583, + SpvCapabilityIntegerFunctions2INTEL = 5584, + SpvCapabilityFunctionPointersINTEL = 5603, + SpvCapabilityIndirectReferencesINTEL = 5604, + SpvCapabilityAsmINTEL = 5606, + SpvCapabilityAtomicFloat32MinMaxEXT = 5612, + SpvCapabilityAtomicFloat64MinMaxEXT = 5613, + SpvCapabilityAtomicFloat16MinMaxEXT = 5616, + SpvCapabilityVectorComputeINTEL = 5617, + SpvCapabilityVectorAnyINTEL = 5619, + SpvCapabilityExpectAssumeKHR = 5629, + SpvCapabilitySubgroupAvcMotionEstimationINTEL = 5696, + SpvCapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + SpvCapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, + SpvCapabilityVariableLengthArrayINTEL = 5817, + SpvCapabilityFunctionFloatControlINTEL = 5821, + SpvCapabilityFPGAMemoryAttributesINTEL = 5824, + SpvCapabilityFPFastMathModeINTEL = 5837, + SpvCapabilityArbitraryPrecisionIntegersINTEL = 5844, + SpvCapabilityArbitraryPrecisionFloatingPointINTEL = 5845, + SpvCapabilityUnstructuredLoopControlsINTEL = 5886, + SpvCapabilityFPGALoopControlsINTEL = 5888, + SpvCapabilityKernelAttributesINTEL = 5892, + SpvCapabilityFPGAKernelAttributesINTEL = 5897, + SpvCapabilityFPGAMemoryAccessesINTEL = 5898, + SpvCapabilityFPGAClusterAttributesINTEL = 5904, + SpvCapabilityLoopFuseINTEL = 5906, + SpvCapabilityMemoryAccessAliasingINTEL = 5910, + SpvCapabilityFPGABufferLocationINTEL = 5920, + SpvCapabilityArbitraryPrecisionFixedPointINTEL = 5922, + SpvCapabilityUSMStorageClassesINTEL = 5935, + SpvCapabilityIOPipesINTEL = 5943, + SpvCapabilityBlockingPipesINTEL = 5945, + SpvCapabilityFPGARegINTEL = 5948, + SpvCapabilityDotProductInputAll = 6016, + SpvCapabilityDotProductInputAllKHR = 6016, + SpvCapabilityDotProductInput4x8Bit = 6017, + SpvCapabilityDotProductInput4x8BitKHR = 6017, + SpvCapabilityDotProductInput4x8BitPacked = 6018, + SpvCapabilityDotProductInput4x8BitPackedKHR = 6018, + SpvCapabilityDotProduct = 6019, + SpvCapabilityDotProductKHR = 6019, + SpvCapabilityBitInstructions = 6025, + SpvCapabilityAtomicFloat32AddEXT = 6033, + SpvCapabilityAtomicFloat64AddEXT = 6034, + SpvCapabilityLongConstantCompositeINTEL = 6089, + SpvCapabilityOptNoneINTEL = 6094, + SpvCapabilityAtomicFloat16AddEXT = 6095, + SpvCapabilityDebugInfoModuleINTEL = 6114, + SpvCapabilityMax = 0x7fffffff, +} SpvCapability; + +typedef enum SpvRayFlagsShift_ { + SpvRayFlagsOpaqueKHRShift = 0, + SpvRayFlagsNoOpaqueKHRShift = 1, + SpvRayFlagsTerminateOnFirstHitKHRShift = 2, + SpvRayFlagsSkipClosestHitShaderKHRShift = 3, + SpvRayFlagsCullBackFacingTrianglesKHRShift = 4, + SpvRayFlagsCullFrontFacingTrianglesKHRShift = 5, + SpvRayFlagsCullOpaqueKHRShift = 6, + SpvRayFlagsCullNoOpaqueKHRShift = 7, + SpvRayFlagsSkipTrianglesKHRShift = 8, + SpvRayFlagsSkipAABBsKHRShift = 9, + SpvRayFlagsMax = 0x7fffffff, +} SpvRayFlagsShift; + +typedef enum SpvRayFlagsMask_ { + SpvRayFlagsMaskNone = 0, + SpvRayFlagsOpaqueKHRMask = 0x00000001, + SpvRayFlagsNoOpaqueKHRMask = 0x00000002, + SpvRayFlagsTerminateOnFirstHitKHRMask = 0x00000004, + SpvRayFlagsSkipClosestHitShaderKHRMask = 0x00000008, + SpvRayFlagsCullBackFacingTrianglesKHRMask = 0x00000010, + SpvRayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020, + SpvRayFlagsCullOpaqueKHRMask = 0x00000040, + SpvRayFlagsCullNoOpaqueKHRMask = 0x00000080, + SpvRayFlagsSkipTrianglesKHRMask = 0x00000100, + SpvRayFlagsSkipAABBsKHRMask = 0x00000200, +} SpvRayFlagsMask; + +typedef enum SpvRayQueryIntersection_ { + SpvRayQueryIntersectionRayQueryCandidateIntersectionKHR = 0, + SpvRayQueryIntersectionRayQueryCommittedIntersectionKHR = 1, + SpvRayQueryIntersectionMax = 0x7fffffff, +} SpvRayQueryIntersection; + +typedef enum SpvRayQueryCommittedIntersectionType_ { + SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR = 0, + SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR = 1, + SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR = 2, + SpvRayQueryCommittedIntersectionTypeMax = 0x7fffffff, +} SpvRayQueryCommittedIntersectionType; + +typedef enum SpvRayQueryCandidateIntersectionType_ { + SpvRayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR = 0, + SpvRayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR = 1, + SpvRayQueryCandidateIntersectionTypeMax = 0x7fffffff, +} SpvRayQueryCandidateIntersectionType; + +typedef enum SpvFragmentShadingRateShift_ { + SpvFragmentShadingRateVertical2PixelsShift = 0, + SpvFragmentShadingRateVertical4PixelsShift = 1, + SpvFragmentShadingRateHorizontal2PixelsShift = 2, + SpvFragmentShadingRateHorizontal4PixelsShift = 3, + SpvFragmentShadingRateMax = 0x7fffffff, +} SpvFragmentShadingRateShift; + +typedef enum SpvFragmentShadingRateMask_ { + SpvFragmentShadingRateMaskNone = 0, + SpvFragmentShadingRateVertical2PixelsMask = 0x00000001, + SpvFragmentShadingRateVertical4PixelsMask = 0x00000002, + SpvFragmentShadingRateHorizontal2PixelsMask = 0x00000004, + SpvFragmentShadingRateHorizontal4PixelsMask = 0x00000008, +} SpvFragmentShadingRateMask; + +typedef enum SpvFPDenormMode_ { + SpvFPDenormModePreserve = 0, + SpvFPDenormModeFlushToZero = 1, + SpvFPDenormModeMax = 0x7fffffff, +} SpvFPDenormMode; + +typedef enum SpvFPOperationMode_ { + SpvFPOperationModeIEEE = 0, + SpvFPOperationModeALT = 1, + SpvFPOperationModeMax = 0x7fffffff, +} SpvFPOperationMode; + +typedef enum SpvQuantizationModes_ { + SpvQuantizationModesTRN = 0, + SpvQuantizationModesTRN_ZERO = 1, + SpvQuantizationModesRND = 2, + SpvQuantizationModesRND_ZERO = 3, + SpvQuantizationModesRND_INF = 4, + SpvQuantizationModesRND_MIN_INF = 5, + SpvQuantizationModesRND_CONV = 6, + SpvQuantizationModesRND_CONV_ODD = 7, + SpvQuantizationModesMax = 0x7fffffff, +} SpvQuantizationModes; + +typedef enum SpvOverflowModes_ { + SpvOverflowModesWRAP = 0, + SpvOverflowModesSAT = 1, + SpvOverflowModesSAT_ZERO = 2, + SpvOverflowModesSAT_SYM = 3, + SpvOverflowModesMax = 0x7fffffff, +} SpvOverflowModes; + +typedef enum SpvPackedVectorFormat_ { + SpvPackedVectorFormatPackedVectorFormat4x8Bit = 0, + SpvPackedVectorFormatPackedVectorFormat4x8BitKHR = 0, + SpvPackedVectorFormatMax = 0x7fffffff, +} SpvPackedVectorFormat; + +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSourceContinued = 2, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImage = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, + SpvOpImageSparseRead = 320, + SpvOpSizeOf = 321, + SpvOpTypePipeStorage = 322, + SpvOpConstantPipeStorage = 323, + SpvOpCreatePipeFromPipeStorage = 324, + SpvOpGetKernelLocalSizeForSubgroupCount = 325, + SpvOpGetKernelMaxNumSubgroups = 326, + SpvOpTypeNamedBarrier = 327, + SpvOpNamedBarrierInitialize = 328, + SpvOpMemoryNamedBarrier = 329, + SpvOpModuleProcessed = 330, + SpvOpExecutionModeId = 331, + SpvOpDecorateId = 332, + SpvOpGroupNonUniformElect = 333, + SpvOpGroupNonUniformAll = 334, + SpvOpGroupNonUniformAny = 335, + SpvOpGroupNonUniformAllEqual = 336, + SpvOpGroupNonUniformBroadcast = 337, + SpvOpGroupNonUniformBroadcastFirst = 338, + SpvOpGroupNonUniformBallot = 339, + SpvOpGroupNonUniformInverseBallot = 340, + SpvOpGroupNonUniformBallotBitExtract = 341, + SpvOpGroupNonUniformBallotBitCount = 342, + SpvOpGroupNonUniformBallotFindLSB = 343, + SpvOpGroupNonUniformBallotFindMSB = 344, + SpvOpGroupNonUniformShuffle = 345, + SpvOpGroupNonUniformShuffleXor = 346, + SpvOpGroupNonUniformShuffleUp = 347, + SpvOpGroupNonUniformShuffleDown = 348, + SpvOpGroupNonUniformIAdd = 349, + SpvOpGroupNonUniformFAdd = 350, + SpvOpGroupNonUniformIMul = 351, + SpvOpGroupNonUniformFMul = 352, + SpvOpGroupNonUniformSMin = 353, + SpvOpGroupNonUniformUMin = 354, + SpvOpGroupNonUniformFMin = 355, + SpvOpGroupNonUniformSMax = 356, + SpvOpGroupNonUniformUMax = 357, + SpvOpGroupNonUniformFMax = 358, + SpvOpGroupNonUniformBitwiseAnd = 359, + SpvOpGroupNonUniformBitwiseOr = 360, + SpvOpGroupNonUniformBitwiseXor = 361, + SpvOpGroupNonUniformLogicalAnd = 362, + SpvOpGroupNonUniformLogicalOr = 363, + SpvOpGroupNonUniformLogicalXor = 364, + SpvOpGroupNonUniformQuadBroadcast = 365, + SpvOpGroupNonUniformQuadSwap = 366, + SpvOpCopyLogical = 400, + SpvOpPtrEqual = 401, + SpvOpPtrNotEqual = 402, + SpvOpPtrDiff = 403, + SpvOpTerminateInvocation = 4416, + SpvOpSubgroupBallotKHR = 4421, + SpvOpSubgroupFirstInvocationKHR = 4422, + SpvOpSubgroupAllKHR = 4428, + SpvOpSubgroupAnyKHR = 4429, + SpvOpSubgroupAllEqualKHR = 4430, + SpvOpSubgroupReadInvocationKHR = 4432, + SpvOpTraceRayKHR = 4445, + SpvOpExecuteCallableKHR = 4446, + SpvOpConvertUToAccelerationStructureKHR = 4447, + SpvOpIgnoreIntersectionKHR = 4448, + SpvOpTerminateRayKHR = 4449, + SpvOpSDot = 4450, + SpvOpSDotKHR = 4450, + SpvOpUDot = 4451, + SpvOpUDotKHR = 4451, + SpvOpSUDot = 4452, + SpvOpSUDotKHR = 4452, + SpvOpSDotAccSat = 4453, + SpvOpSDotAccSatKHR = 4453, + SpvOpUDotAccSat = 4454, + SpvOpUDotAccSatKHR = 4454, + SpvOpSUDotAccSat = 4455, + SpvOpSUDotAccSatKHR = 4455, + SpvOpTypeRayQueryKHR = 4472, + SpvOpRayQueryInitializeKHR = 4473, + SpvOpRayQueryTerminateKHR = 4474, + SpvOpRayQueryGenerateIntersectionKHR = 4475, + SpvOpRayQueryConfirmIntersectionKHR = 4476, + SpvOpRayQueryProceedKHR = 4477, + SpvOpRayQueryGetIntersectionTypeKHR = 4479, + SpvOpGroupIAddNonUniformAMD = 5000, + SpvOpGroupFAddNonUniformAMD = 5001, + SpvOpGroupFMinNonUniformAMD = 5002, + SpvOpGroupUMinNonUniformAMD = 5003, + SpvOpGroupSMinNonUniformAMD = 5004, + SpvOpGroupFMaxNonUniformAMD = 5005, + SpvOpGroupUMaxNonUniformAMD = 5006, + SpvOpGroupSMaxNonUniformAMD = 5007, + SpvOpFragmentMaskFetchAMD = 5011, + SpvOpFragmentFetchAMD = 5012, + SpvOpReadClockKHR = 5056, + SpvOpImageSampleFootprintNV = 5283, + SpvOpGroupNonUniformPartitionNV = 5296, + SpvOpWritePackedPrimitiveIndices4x8NV = 5299, + SpvOpReportIntersectionKHR = 5334, + SpvOpReportIntersectionNV = 5334, + SpvOpIgnoreIntersectionNV = 5335, + SpvOpTerminateRayNV = 5336, + SpvOpTraceNV = 5337, + SpvOpTraceMotionNV = 5338, + SpvOpTraceRayMotionNV = 5339, + SpvOpTypeAccelerationStructureKHR = 5341, + SpvOpTypeAccelerationStructureNV = 5341, + SpvOpExecuteCallableNV = 5344, + SpvOpTypeCooperativeMatrixNV = 5358, + SpvOpCooperativeMatrixLoadNV = 5359, + SpvOpCooperativeMatrixStoreNV = 5360, + SpvOpCooperativeMatrixMulAddNV = 5361, + SpvOpCooperativeMatrixLengthNV = 5362, + SpvOpBeginInvocationInterlockEXT = 5364, + SpvOpEndInvocationInterlockEXT = 5365, + SpvOpDemoteToHelperInvocation = 5380, + SpvOpDemoteToHelperInvocationEXT = 5380, + SpvOpIsHelperInvocationEXT = 5381, + SpvOpConvertUToImageNV = 5391, + SpvOpConvertUToSamplerNV = 5392, + SpvOpConvertImageToUNV = 5393, + SpvOpConvertSamplerToUNV = 5394, + SpvOpConvertUToSampledImageNV = 5395, + SpvOpConvertSampledImageToUNV = 5396, + SpvOpSamplerImageAddressingModeNV = 5397, + SpvOpSubgroupShuffleINTEL = 5571, + SpvOpSubgroupShuffleDownINTEL = 5572, + SpvOpSubgroupShuffleUpINTEL = 5573, + SpvOpSubgroupShuffleXorINTEL = 5574, + SpvOpSubgroupBlockReadINTEL = 5575, + SpvOpSubgroupBlockWriteINTEL = 5576, + SpvOpSubgroupImageBlockReadINTEL = 5577, + SpvOpSubgroupImageBlockWriteINTEL = 5578, + SpvOpSubgroupImageMediaBlockReadINTEL = 5580, + SpvOpSubgroupImageMediaBlockWriteINTEL = 5581, + SpvOpUCountLeadingZerosINTEL = 5585, + SpvOpUCountTrailingZerosINTEL = 5586, + SpvOpAbsISubINTEL = 5587, + SpvOpAbsUSubINTEL = 5588, + SpvOpIAddSatINTEL = 5589, + SpvOpUAddSatINTEL = 5590, + SpvOpIAverageINTEL = 5591, + SpvOpUAverageINTEL = 5592, + SpvOpIAverageRoundedINTEL = 5593, + SpvOpUAverageRoundedINTEL = 5594, + SpvOpISubSatINTEL = 5595, + SpvOpUSubSatINTEL = 5596, + SpvOpIMul32x16INTEL = 5597, + SpvOpUMul32x16INTEL = 5598, + SpvOpConstantFunctionPointerINTEL = 5600, + SpvOpFunctionPointerCallINTEL = 5601, + SpvOpAsmTargetINTEL = 5609, + SpvOpAsmINTEL = 5610, + SpvOpAsmCallINTEL = 5611, + SpvOpAtomicFMinEXT = 5614, + SpvOpAtomicFMaxEXT = 5615, + SpvOpAssumeTrueKHR = 5630, + SpvOpExpectKHR = 5631, + SpvOpDecorateString = 5632, + SpvOpDecorateStringGOOGLE = 5632, + SpvOpMemberDecorateString = 5633, + SpvOpMemberDecorateStringGOOGLE = 5633, + SpvOpVmeImageINTEL = 5699, + SpvOpTypeVmeImageINTEL = 5700, + SpvOpTypeAvcImePayloadINTEL = 5701, + SpvOpTypeAvcRefPayloadINTEL = 5702, + SpvOpTypeAvcSicPayloadINTEL = 5703, + SpvOpTypeAvcMcePayloadINTEL = 5704, + SpvOpTypeAvcMceResultINTEL = 5705, + SpvOpTypeAvcImeResultINTEL = 5706, + SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + SpvOpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + SpvOpTypeAvcImeDualReferenceStreaminINTEL = 5710, + SpvOpTypeAvcRefResultINTEL = 5711, + SpvOpTypeAvcSicResultINTEL = 5712, + SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + SpvOpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + SpvOpSubgroupAvcMceConvertToImeResultINTEL = 5733, + SpvOpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + SpvOpSubgroupAvcMceConvertToRefResultINTEL = 5735, + SpvOpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + SpvOpSubgroupAvcMceConvertToSicResultINTEL = 5737, + SpvOpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + SpvOpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + SpvOpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + SpvOpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + SpvOpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + SpvOpSubgroupAvcImeInitializeINTEL = 5747, + SpvOpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + SpvOpSubgroupAvcImeSetDualReferenceINTEL = 5749, + SpvOpSubgroupAvcImeRefWindowSizeINTEL = 5750, + SpvOpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + SpvOpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + SpvOpSubgroupAvcImeSetWeightedSadINTEL = 5756, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + SpvOpSubgroupAvcImeConvertToMceResultINTEL = 5765, + SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + SpvOpSubgroupAvcImeGetBorderReachedINTEL = 5776, + SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + SpvOpSubgroupAvcFmeInitializeINTEL = 5781, + SpvOpSubgroupAvcBmeInitializeINTEL = 5782, + SpvOpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + SpvOpSubgroupAvcRefConvertToMceResultINTEL = 5790, + SpvOpSubgroupAvcSicInitializeINTEL = 5791, + SpvOpSubgroupAvcSicConfigureSkcINTEL = 5792, + SpvOpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + SpvOpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + SpvOpSubgroupAvcSicEvaluateIpeINTEL = 5803, + SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + SpvOpSubgroupAvcSicConvertToMceResultINTEL = 5808, + SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + SpvOpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + SpvOpSubgroupAvcSicGetInterRawSadsINTEL = 5816, + SpvOpVariableLengthArrayINTEL = 5818, + SpvOpSaveMemoryINTEL = 5819, + SpvOpRestoreMemoryINTEL = 5820, + SpvOpArbitraryFloatSinCosPiINTEL = 5840, + SpvOpArbitraryFloatCastINTEL = 5841, + SpvOpArbitraryFloatCastFromIntINTEL = 5842, + SpvOpArbitraryFloatCastToIntINTEL = 5843, + SpvOpArbitraryFloatAddINTEL = 5846, + SpvOpArbitraryFloatSubINTEL = 5847, + SpvOpArbitraryFloatMulINTEL = 5848, + SpvOpArbitraryFloatDivINTEL = 5849, + SpvOpArbitraryFloatGTINTEL = 5850, + SpvOpArbitraryFloatGEINTEL = 5851, + SpvOpArbitraryFloatLTINTEL = 5852, + SpvOpArbitraryFloatLEINTEL = 5853, + SpvOpArbitraryFloatEQINTEL = 5854, + SpvOpArbitraryFloatRecipINTEL = 5855, + SpvOpArbitraryFloatRSqrtINTEL = 5856, + SpvOpArbitraryFloatCbrtINTEL = 5857, + SpvOpArbitraryFloatHypotINTEL = 5858, + SpvOpArbitraryFloatSqrtINTEL = 5859, + SpvOpArbitraryFloatLogINTEL = 5860, + SpvOpArbitraryFloatLog2INTEL = 5861, + SpvOpArbitraryFloatLog10INTEL = 5862, + SpvOpArbitraryFloatLog1pINTEL = 5863, + SpvOpArbitraryFloatExpINTEL = 5864, + SpvOpArbitraryFloatExp2INTEL = 5865, + SpvOpArbitraryFloatExp10INTEL = 5866, + SpvOpArbitraryFloatExpm1INTEL = 5867, + SpvOpArbitraryFloatSinINTEL = 5868, + SpvOpArbitraryFloatCosINTEL = 5869, + SpvOpArbitraryFloatSinCosINTEL = 5870, + SpvOpArbitraryFloatSinPiINTEL = 5871, + SpvOpArbitraryFloatCosPiINTEL = 5872, + SpvOpArbitraryFloatASinINTEL = 5873, + SpvOpArbitraryFloatASinPiINTEL = 5874, + SpvOpArbitraryFloatACosINTEL = 5875, + SpvOpArbitraryFloatACosPiINTEL = 5876, + SpvOpArbitraryFloatATanINTEL = 5877, + SpvOpArbitraryFloatATanPiINTEL = 5878, + SpvOpArbitraryFloatATan2INTEL = 5879, + SpvOpArbitraryFloatPowINTEL = 5880, + SpvOpArbitraryFloatPowRINTEL = 5881, + SpvOpArbitraryFloatPowNINTEL = 5882, + SpvOpLoopControlINTEL = 5887, + SpvOpAliasDomainDeclINTEL = 5911, + SpvOpAliasScopeDeclINTEL = 5912, + SpvOpAliasScopeListDeclINTEL = 5913, + SpvOpFixedSqrtINTEL = 5923, + SpvOpFixedRecipINTEL = 5924, + SpvOpFixedRsqrtINTEL = 5925, + SpvOpFixedSinINTEL = 5926, + SpvOpFixedCosINTEL = 5927, + SpvOpFixedSinCosINTEL = 5928, + SpvOpFixedSinPiINTEL = 5929, + SpvOpFixedCosPiINTEL = 5930, + SpvOpFixedSinCosPiINTEL = 5931, + SpvOpFixedLogINTEL = 5932, + SpvOpFixedExpINTEL = 5933, + SpvOpPtrCastToCrossWorkgroupINTEL = 5934, + SpvOpCrossWorkgroupCastToPtrINTEL = 5938, + SpvOpReadPipeBlockingINTEL = 5946, + SpvOpWritePipeBlockingINTEL = 5947, + SpvOpFPGARegINTEL = 5949, + SpvOpRayQueryGetRayTMinKHR = 6016, + SpvOpRayQueryGetRayFlagsKHR = 6017, + SpvOpRayQueryGetIntersectionTKHR = 6018, + SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, + SpvOpRayQueryGetIntersectionInstanceIdKHR = 6020, + SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, + SpvOpRayQueryGetIntersectionGeometryIndexKHR = 6022, + SpvOpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, + SpvOpRayQueryGetIntersectionBarycentricsKHR = 6024, + SpvOpRayQueryGetIntersectionFrontFaceKHR = 6025, + SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, + SpvOpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, + SpvOpRayQueryGetIntersectionObjectRayOriginKHR = 6028, + SpvOpRayQueryGetWorldRayDirectionKHR = 6029, + SpvOpRayQueryGetWorldRayOriginKHR = 6030, + SpvOpRayQueryGetIntersectionObjectToWorldKHR = 6031, + SpvOpRayQueryGetIntersectionWorldToObjectKHR = 6032, + SpvOpAtomicFAddEXT = 6035, + SpvOpTypeBufferSurfaceINTEL = 6086, + SpvOpTypeStructContinuedINTEL = 6090, + SpvOpConstantCompositeContinuedINTEL = 6091, + SpvOpSpecConstantCompositeContinuedINTEL = 6092, + SpvOpMax = 0x7fffffff, +} SpvOp; + +#ifdef SPV_ENABLE_UTILITY_CODE +inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case SpvOpNop: *hasResult = false; *hasResultType = false; break; + case SpvOpUndef: *hasResult = true; *hasResultType = true; break; + case SpvOpSourceContinued: *hasResult = false; *hasResultType = false; break; + case SpvOpSource: *hasResult = false; *hasResultType = false; break; + case SpvOpSourceExtension: *hasResult = false; *hasResultType = false; break; + case SpvOpName: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberName: *hasResult = false; *hasResultType = false; break; + case SpvOpString: *hasResult = true; *hasResultType = false; break; + case SpvOpLine: *hasResult = false; *hasResultType = false; break; + case SpvOpExtension: *hasResult = false; *hasResultType = false; break; + case SpvOpExtInstImport: *hasResult = true; *hasResultType = false; break; + case SpvOpExtInst: *hasResult = true; *hasResultType = true; break; + case SpvOpMemoryModel: *hasResult = false; *hasResultType = false; break; + case SpvOpEntryPoint: *hasResult = false; *hasResultType = false; break; + case SpvOpExecutionMode: *hasResult = false; *hasResultType = false; break; + case SpvOpCapability: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeVoid: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeBool: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeInt: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeFloat: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeVector: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeImage: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeSampler: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeArray: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeStruct: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case SpvOpTypePointer: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeFunction: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeEvent: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeQueue: *hasResult = true; *hasResultType = false; break; + case SpvOpTypePipe: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case SpvOpConstantTrue: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantFalse: *hasResult = true; *hasResultType = true; break; + case SpvOpConstant: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantComposite: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantSampler: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantNull: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstant: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case SpvOpFunction: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case SpvOpFunctionCall: *hasResult = true; *hasResultType = true; break; + case SpvOpVariable: *hasResult = true; *hasResultType = true; break; + case SpvOpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case SpvOpLoad: *hasResult = true; *hasResultType = true; break; + case SpvOpStore: *hasResult = false; *hasResultType = false; break; + case SpvOpCopyMemory: *hasResult = false; *hasResultType = false; break; + case SpvOpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case SpvOpAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpArrayLength: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case SpvOpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case SpvOpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case SpvOpCopyObject: *hasResult = true; *hasResultType = true; break; + case SpvOpTranspose: *hasResult = true; *hasResultType = true; break; + case SpvOpSampledImage: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageFetch: *hasResult = true; *hasResultType = true; break; + case SpvOpImageGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageRead: *hasResult = true; *hasResultType = true; break; + case SpvOpImageWrite: *hasResult = false; *hasResultType = false; break; + case SpvOpImage: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertFToU: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertFToS: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertSToF: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToF: *hasResult = true; *hasResultType = true; break; + case SpvOpUConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpSConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpFConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case SpvOpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case SpvOpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case SpvOpBitcast: *hasResult = true; *hasResultType = true; break; + case SpvOpSNegate: *hasResult = true; *hasResultType = true; break; + case SpvOpFNegate: *hasResult = true; *hasResultType = true; break; + case SpvOpIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpISub: *hasResult = true; *hasResultType = true; break; + case SpvOpFSub: *hasResult = true; *hasResultType = true; break; + case SpvOpIMul: *hasResult = true; *hasResultType = true; break; + case SpvOpFMul: *hasResult = true; *hasResultType = true; break; + case SpvOpUDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpSDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpFDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpUMod: *hasResult = true; *hasResultType = true; break; + case SpvOpSRem: *hasResult = true; *hasResultType = true; break; + case SpvOpSMod: *hasResult = true; *hasResultType = true; break; + case SpvOpFRem: *hasResult = true; *hasResultType = true; break; + case SpvOpFMod: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case SpvOpOuterProduct: *hasResult = true; *hasResultType = true; break; + case SpvOpDot: *hasResult = true; *hasResultType = true; break; + case SpvOpIAddCarry: *hasResult = true; *hasResultType = true; break; + case SpvOpISubBorrow: *hasResult = true; *hasResultType = true; break; + case SpvOpUMulExtended: *hasResult = true; *hasResultType = true; break; + case SpvOpSMulExtended: *hasResult = true; *hasResultType = true; break; + case SpvOpAny: *hasResult = true; *hasResultType = true; break; + case SpvOpAll: *hasResult = true; *hasResultType = true; break; + case SpvOpIsNan: *hasResult = true; *hasResultType = true; break; + case SpvOpIsInf: *hasResult = true; *hasResultType = true; break; + case SpvOpIsFinite: *hasResult = true; *hasResultType = true; break; + case SpvOpIsNormal: *hasResult = true; *hasResultType = true; break; + case SpvOpSignBitSet: *hasResult = true; *hasResultType = true; break; + case SpvOpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case SpvOpOrdered: *hasResult = true; *hasResultType = true; break; + case SpvOpUnordered: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalOr: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalNot: *hasResult = true; *hasResultType = true; break; + case SpvOpSelect: *hasResult = true; *hasResultType = true; break; + case SpvOpIEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpINotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpULessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpSLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpNot: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpBitReverse: *hasResult = true; *hasResultType = true; break; + case SpvOpBitCount: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdx: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdy: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidth: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdxFine: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdyFine: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidthFine: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpEmitVertex: *hasResult = false; *hasResultType = false; break; + case SpvOpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case SpvOpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case SpvOpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicStore: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicISub: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicOr: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicXor: *hasResult = true; *hasResultType = true; break; + case SpvOpPhi: *hasResult = true; *hasResultType = true; break; + case SpvOpLoopMerge: *hasResult = false; *hasResultType = false; break; + case SpvOpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case SpvOpLabel: *hasResult = true; *hasResultType = false; break; + case SpvOpBranch: *hasResult = false; *hasResultType = false; break; + case SpvOpBranchConditional: *hasResult = false; *hasResultType = false; break; + case SpvOpSwitch: *hasResult = false; *hasResultType = false; break; + case SpvOpKill: *hasResult = false; *hasResultType = false; break; + case SpvOpReturn: *hasResult = false; *hasResultType = false; break; + case SpvOpReturnValue: *hasResult = false; *hasResultType = false; break; + case SpvOpUnreachable: *hasResult = false; *hasResultType = false; break; + case SpvOpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case SpvOpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupAll: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupAny: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpReadPipe: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case SpvOpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case SpvOpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case SpvOpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case SpvOpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case SpvOpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case SpvOpRetainEvent: *hasResult = false; *hasResultType = false; break; + case SpvOpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case SpvOpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case SpvOpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case SpvOpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case SpvOpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case SpvOpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case SpvOpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case SpvOpNoLine: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case SpvOpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case SpvOpSizeOf: *hasResult = true; *hasResultType = true; break; + case SpvOpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case SpvOpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case SpvOpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case SpvOpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case SpvOpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case SpvOpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case SpvOpDecorateId: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case SpvOpCopyLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrDiff: *hasResult = true; *hasResultType = true; break; + case SpvOpTerminateInvocation: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpTraceRayKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpIgnoreIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpTerminateRayKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpSDot: *hasResult = true; *hasResultType = true; break; + case SpvOpUDot: *hasResult = true; *hasResultType = true; break; + case SpvOpSUDot: *hasResult = true; *hasResultType = true; break; + case SpvOpSDotAccSat: *hasResult = true; *hasResultType = true; break; + case SpvOpUDotAccSat: *hasResult = true; *hasResultType = true; break; + case SpvOpSUDotAccSat: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break; + case SpvOpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case SpvOpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case SpvOpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTraceNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTraceMotionNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case SpvOpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case SpvOpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case SpvOpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case SpvOpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case SpvOpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case SpvOpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break; + case SpvOpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToImageNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertImageToUNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertSamplerToUNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break; + case SpvOpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAsmINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAsmCallINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpExpectKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpDecorateString: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case SpvOpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpVariableLengthArrayINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSaveMemoryINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpRestoreMemoryINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpArbitraryFloatSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCastINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCastFromIntINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCastToIntINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatAddINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatMulINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatDivINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatGTINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatGEINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLTINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLEINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatEQINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatRecipINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatRSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCbrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatHypotINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLogINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLog2INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLog10INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLog1pINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatExpINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatExp2INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatExp10INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatExpm1INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSinINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatASinINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatASinPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatACosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatACosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatATanINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatATanPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatATan2INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatPowINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpAliasDomainDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpAliasScopeDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpAliasScopeListDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedSinINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedCosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedLogINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedExpINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrCastToCrossWorkgroupINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpCrossWorkgroupCastToPtrINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpReadPipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFPGARegINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFAddEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeBufferSurfaceINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + +#endif diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c new file mode 100644 index 00000000000..30205088b1b --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-common/blob.c @@ -0,0 +1,136 @@ +/* + * Copyright 2017 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#define COBJMACROS +#include "vkd3d.h" +#include "vkd3d_blob.h" +#include "vkd3d_debug.h" +#include "vkd3d_memory.h" + +struct vkd3d_blob +{ + ID3D10Blob ID3DBlob_iface; + LONG refcount; + + void *buffer; + SIZE_T size; +}; + +static struct vkd3d_blob *impl_from_ID3DBlob(ID3DBlob *iface) +{ + return CONTAINING_RECORD(iface, struct vkd3d_blob, ID3DBlob_iface); +} + +static HRESULT STDMETHODCALLTYPE vkd3d_blob_QueryInterface(ID3DBlob *iface, REFIID riid, void **object) +{ + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + + if (IsEqualGUID(riid, &IID_ID3DBlob) + || IsEqualGUID(riid, &IID_IUnknown)) + { + ID3D10Blob_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE vkd3d_blob_AddRef(ID3DBlob *iface) +{ + struct vkd3d_blob *blob = impl_from_ID3DBlob(iface); + ULONG refcount = InterlockedIncrement(&blob->refcount); + + TRACE("%p increasing refcount to %u.\n", blob, refcount); + + return refcount; +} + +static ULONG STDMETHODCALLTYPE vkd3d_blob_Release(ID3DBlob *iface) +{ + struct vkd3d_blob *blob = impl_from_ID3DBlob(iface); + ULONG refcount = InterlockedDecrement(&blob->refcount); + + TRACE("%p decreasing refcount to %u.\n", blob, refcount); + + if (!refcount) + { + vkd3d_free(blob->buffer); + + vkd3d_free(blob); + } + + return refcount; +} + +static void * STDMETHODCALLTYPE vkd3d_blob_GetBufferPointer(ID3DBlob *iface) +{ + struct vkd3d_blob *blob = impl_from_ID3DBlob(iface); + + TRACE("iface %p.\n", iface); + + return blob->buffer; +} + +static SIZE_T STDMETHODCALLTYPE vkd3d_blob_GetBufferSize(ID3DBlob *iface) +{ + struct vkd3d_blob *blob = impl_from_ID3DBlob(iface); + + TRACE("iface %p.\n", iface); + + return blob->size; +} + +static const struct ID3D10BlobVtbl vkd3d_blob_vtbl = +{ + /* IUnknown methods */ + vkd3d_blob_QueryInterface, + vkd3d_blob_AddRef, + vkd3d_blob_Release, + /* ID3DBlob methods */ + vkd3d_blob_GetBufferPointer, + vkd3d_blob_GetBufferSize +}; + +static void vkd3d_blob_init(struct vkd3d_blob *blob, void *buffer, SIZE_T size) +{ + blob->ID3DBlob_iface.lpVtbl = &vkd3d_blob_vtbl; + blob->refcount = 1; + + blob->buffer = buffer; + blob->size = size; +} + +HRESULT vkd3d_blob_create(void *buffer, SIZE_T size, ID3D10Blob **blob) +{ + struct vkd3d_blob *object; + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + vkd3d_blob_init(object, buffer, size); + + TRACE("Created blob object %p.\n", object); + + *blob = &object->ID3DBlob_iface; + + return S_OK; +} diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c new file mode 100644 index 00000000000..4868f3fbaa8 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -0,0 +1,347 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_debug.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define VKD3D_DEBUG_BUFFER_COUNT 64 +#define VKD3D_DEBUG_BUFFER_SIZE 512 + +extern const char *vkd3d_dbg_env_name; + +static const char *debug_level_names[] = +{ + /* VKD3D_DBG_LEVEL_NONE */ "none", + /* VKD3D_DBG_LEVEL_ERR */ "err", + /* VKD3D_DBG_LEVEL_FIXME */ "fixme", + /* VKD3D_DBG_LEVEL_WARN */ "warn", + /* VKD3D_DBG_LEVEL_TRACE */ "trace", +}; + +enum vkd3d_dbg_level vkd3d_dbg_get_level(void) +{ + static unsigned int level = ~0u; + const char *vkd3d_debug; + unsigned int i; + + if (level != ~0u) + return level; + + if (!(vkd3d_debug = getenv(vkd3d_dbg_env_name))) + vkd3d_debug = ""; + + for (i = 0; i < ARRAY_SIZE(debug_level_names); ++i) + { + if (!strcmp(debug_level_names[i], vkd3d_debug)) + { + level = i; + return level; + } + } + + /* Default debug level. */ + level = VKD3D_DBG_LEVEL_FIXME; + return level; +} + +void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) +{ + va_list args; + + if (vkd3d_dbg_get_level() < level) + return; + + assert(level < ARRAY_SIZE(debug_level_names)); + + fprintf(stderr, "%s:%s: ", debug_level_names[level], function); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); +} + +static char *get_buffer(void) +{ + static char buffers[VKD3D_DEBUG_BUFFER_COUNT][VKD3D_DEBUG_BUFFER_SIZE]; + static LONG buffer_index; + LONG current_index; + + current_index = InterlockedIncrement(&buffer_index) % ARRAY_SIZE(buffers); + return buffers[current_index]; +} + +const char *vkd3d_dbg_vsprintf(const char *fmt, va_list args) +{ + char *buffer; + + buffer = get_buffer(); + vsnprintf(buffer, VKD3D_DEBUG_BUFFER_SIZE, fmt, args); + buffer[VKD3D_DEBUG_BUFFER_SIZE - 1] = '\0'; + return buffer; +} + +const char *vkd3d_dbg_sprintf(const char *fmt, ...) +{ + const char *buffer; + va_list args; + + va_start(args, fmt); + buffer = vkd3d_dbg_vsprintf(fmt, args); + va_end(args); + return buffer; +} + +static int get_escape_char(int c) +{ + switch (c) + { + case '"': + case '\\': + return c; + case '\t': + return 't'; + case '\n': + return 'n'; + case '\r': + return 'r'; + default: + return 0; + } +} + +const char *debugstr_an(const char *str, size_t n) +{ + char *buffer, *ptr; + int escape_char; + char c; + + if (!str) + return "(null)"; + if (n == SIZE_MAX) + n = strlen(str); + + ptr = buffer = get_buffer(); + + *ptr++ = '"'; + while (n-- && ptr <= buffer + VKD3D_DEBUG_BUFFER_SIZE - 8) + { + c = *str++; + + if ((escape_char = get_escape_char(c))) + { + *ptr++ = '\\'; + *ptr++ = escape_char; + continue; + } + + if (isprint(c)) + { + *ptr++ = c; + } + else + { + *ptr++ = '\\'; + sprintf(ptr, "%02x", c); + ptr += 2; + } + } + *ptr++ = '"'; + + if (++n) + { + *ptr++ = '.'; + *ptr++ = '.'; + *ptr++ = '.'; + } + *ptr = '\0'; + + return buffer; +} + +const char *debugstr_a(const char *str) +{ + return debugstr_an(str, SIZE_MAX); +} + +static const char *debugstr_w16(const uint16_t *wstr) +{ + char *buffer, *ptr; + uint16_t c; + + if (!wstr) + return "(null)"; + + ptr = buffer = get_buffer(); + + *ptr++ = '"'; + while ((c = *wstr++) && ptr <= buffer + VKD3D_DEBUG_BUFFER_SIZE - 10) + { + int escape_char = get_escape_char(c); + + if (escape_char) + { + *ptr++ = '\\'; + *ptr++ = escape_char; + continue; + } + + if (isprint(c)) + { + *ptr++ = c; + } + else + { + *ptr++ = '\\'; + sprintf(ptr, "%04x", c); + ptr += 4; + } + } + *ptr++ = '"'; + + if (c) + { + *ptr++ = '.'; + *ptr++ = '.'; + *ptr++ = '.'; + } + *ptr = '\0'; + + return buffer; +} + +static const char *debugstr_w32(const uint32_t *wstr) +{ + char *buffer, *ptr; + uint32_t c; + + if (!wstr) + return "(null)"; + + ptr = buffer = get_buffer(); + + *ptr++ = '"'; + while ((c = *wstr++) && ptr <= buffer + VKD3D_DEBUG_BUFFER_SIZE - 10) + { + int escape_char = get_escape_char(c); + + if (escape_char) + { + *ptr++ = '\\'; + *ptr++ = escape_char; + continue; + } + + if (isprint(c)) + { + *ptr++ = c; + } + else + { + *ptr++ = '\\'; + sprintf(ptr, "%04x", c); + ptr += 4; + } + } + *ptr++ = '"'; + + if (c) + { + *ptr++ = '.'; + *ptr++ = '.'; + *ptr++ = '.'; + } + *ptr = '\0'; + + return buffer; +} + +const char *debugstr_w(const WCHAR *wstr, size_t wchar_size) +{ + if (wchar_size == 2) + return debugstr_w16((const uint16_t *)wstr); + return debugstr_w32((const uint32_t *)wstr); +} + +unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value) +{ + const char *value = getenv(name); + unsigned long r; + char *end_ptr; + + if (value) + { + errno = 0; + r = strtoul(value, &end_ptr, 0); + if (!errno && end_ptr != value) + return min(r, UINT_MAX); + } + + return default_value; +} + +static bool is_option_separator(char c) +{ + return c == ',' || c == ';' || c == '\0'; +} + +bool vkd3d_debug_list_has_member(const char *string, const char *member) +{ + char prev_char, next_char; + const char *p; + + p = string; + while (p) + { + if ((p = strstr(p, member))) + { + prev_char = p > string ? p[-1] : 0; + p += strlen(member); + next_char = *p; + + if (is_option_separator(prev_char) && is_option_separator(next_char)) + return true; + } + } + + return false; +} + +uint64_t vkd3d_parse_debug_options(const char *string, + const struct vkd3d_debug_option *options, unsigned int option_count) +{ + uint64_t flags = 0; + unsigned int i; + + for (i = 0; i < option_count; ++i) + { + const struct vkd3d_debug_option *opt = &options[i]; + + if (vkd3d_debug_list_has_member(string, opt->name)) + flags |= opt->flag; + } + + return flags; +} diff --git a/libs/vkd3d/libs/vkd3d-common/error.c b/libs/vkd3d/libs/vkd3d-common/error.c new file mode 100644 index 00000000000..81c1fd97a58 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-common/error.c @@ -0,0 +1,43 @@ +/* + * Copyright 2018 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_common.h" +#include "vkd3d_debug.h" + +HRESULT hresult_from_vkd3d_result(int vkd3d_result) +{ + switch (vkd3d_result) + { + case VKD3D_OK: + return S_OK; + case VKD3D_ERROR_INVALID_SHADER: + WARN("Invalid shader bytecode.\n"); + /* fall-through */ + case VKD3D_ERROR: + return E_FAIL; + case VKD3D_ERROR_OUT_OF_MEMORY: + return E_OUTOFMEMORY; + case VKD3D_ERROR_INVALID_ARGUMENT: + return E_INVALIDARG; + case VKD3D_ERROR_NOT_IMPLEMENTED: + return E_NOTIMPL; + default: + FIXME("Unhandled vkd3d result %d.\n", vkd3d_result); + return E_FAIL; + } +} diff --git a/libs/vkd3d/libs/vkd3d-common/memory.c b/libs/vkd3d/libs/vkd3d-common/memory.c new file mode 100644 index 00000000000..2bf8947e090 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-common/memory.c @@ -0,0 +1,48 @@ +/* + * Copyright 2016 Henri Verbeet for CodeWeavers + * Copyright 2017 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_memory.h" + +bool vkd3d_array_reserve(void **elements, size_t *capacity, size_t element_count, size_t element_size) +{ + size_t new_capacity, max_capacity; + void *new_elements; + + if (element_count <= *capacity) + return true; + + max_capacity = ~(size_t)0 / element_size; + if (max_capacity < element_count) + return false; + + new_capacity = max(*capacity, 4); + while (new_capacity < element_count && new_capacity <= max_capacity / 2) + new_capacity *= 2; + + if (new_capacity < element_count) + new_capacity = element_count; + + if (!(new_elements = vkd3d_realloc(*elements, new_capacity * element_size))) + return false; + + *elements = new_elements; + *capacity = new_capacity; + + return true; +} diff --git a/libs/vkd3d/libs/vkd3d-common/utf8.c b/libs/vkd3d/libs/vkd3d-common/utf8.c new file mode 100644 index 00000000000..8bf4eb05e4f --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-common/utf8.c @@ -0,0 +1,164 @@ +/* + * Copyright 2000 Alexandre Julliard + * Copyright 2019 Zhiyi Zhang for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_memory.h" +#include "vkd3d_utf8.h" + +#include + +static size_t vkd3d_utf8_len(uint32_t c) +{ + /* 0x00-0x7f: 1 byte */ + if (c < 0x80) + return 1; + /* 0x80-0x7ff: 2 bytes */ + if (c < 0x800) + return 2; + /* 0x800-0xffff: 3 bytes */ + if (c < 0x10000) + return 3; + /* 0x10000-0x10ffff: 4 bytes */ + return 4; +} + +static void vkd3d_utf8_append(char **dst, uint32_t c) +{ + char *d = *dst; + + /* 0x00-0x7f: 1 byte */ + if (c < 0x80) + { + d[0] = c; + *dst += 1; + return; + } + + /* 0x80-0x7ff: 2 bytes */ + if (c < 0x800) + { + d[1] = 0x80 | (c & 0x3f); + c >>= 6; + d[0] = 0xc0 | c; + *dst += 2; + return; + } + + /* 0x800-0xffff: 3 bytes */ + if (c < 0x10000) /* 0x800-0xffff: 3 bytes */ + { + d[2] = 0x80 | (c & 0x3f); + c >>= 6; + d[1] = 0x80 | (c & 0x3f); + c >>= 6; + d[0] = 0xe0 | c; + *dst += 3; + return; + } + + /* 0x10000-0x10ffff: 4 bytes */ + d[3] = 0x80 | (c & 0x3f); + c >>= 6; + d[2] = 0x80 | (c & 0x3f); + c >>= 6; + d[1] = 0x80 | (c & 0x3f); + c >>= 6; + d[0] = 0xf0 | c; + *dst += 4; +} + +static uint32_t vkd3d_utf16_read(const uint16_t **src) +{ + const uint16_t *s = *src; + + if (s[0] < 0xd800 || s[0] > 0xdfff) /* Not a surrogate pair. */ + { + *src += 1; + return s[0]; + } + + if (s[0] > 0xdbff /* Invalid high surrogate. */ + || s[1] < 0xdc00 || s[1] > 0xdfff) /* Invalid low surrogate. */ + { + *src += 1; + return 0; + } + + *src += 2; + return 0x10000 + ((s[0] & 0x3ff) << 10) + (s[1] & 0x3ff); +} + +static char *vkd3d_strdup_w16_utf8(const uint16_t *wstr) +{ + const uint16_t *src = wstr; + size_t dst_size = 0; + char *dst, *utf8; + uint32_t c; + + while (*src) + { + if (!(c = vkd3d_utf16_read(&src))) + continue; + dst_size += vkd3d_utf8_len(c); + } + ++dst_size; + + if (!(dst = vkd3d_malloc(dst_size))) + return NULL; + + utf8 = dst; + src = wstr; + while (*src) + { + if (!(c = vkd3d_utf16_read(&src))) + continue; + vkd3d_utf8_append(&utf8, c); + } + *utf8 = 0; + + return dst; +} + +static char *vkd3d_strdup_w32_utf8(const uint32_t *wstr) +{ + const uint32_t *src = wstr; + size_t dst_size = 0; + char *dst, *utf8; + + while (*src) + dst_size += vkd3d_utf8_len(*src++); + ++dst_size; + + if (!(dst = vkd3d_malloc(dst_size))) + return NULL; + + utf8 = dst; + src = wstr; + while (*src) + vkd3d_utf8_append(&utf8, *src++); + *utf8 = 0; + + return dst; +} + +char *vkd3d_strdup_w_utf8(const WCHAR *wstr, size_t wchar_size) +{ + if (wchar_size == 2) + return vkd3d_strdup_w16_utf8((const uint16_t *)wstr); + return vkd3d_strdup_w32_utf8((const uint32_t *)wstr); +} diff --git a/libs/vkd3d/libs/vkd3d-shader/checksum.c b/libs/vkd3d/libs/vkd3d-shader/checksum.c new file mode 100644 index 00000000000..0910729a0e9 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/checksum.c @@ -0,0 +1,300 @@ +/* + * Copyright (C) 2001 Nikos Mavroyanopoulos + * Copyright (C) 2004 Hans Leidekker + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* + * This code implements the MD5 message-digest algorithm. + * It is based on code in the public domain written by Colin + * Plumb in 1993. The algorithm is due to Ron Rivest. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * md5_ctx structure, pass it to md5_init, call md5_update as + * needed on buffers full of bytes, and then call md5_final, which + * will fill a supplied 16-byte array with the digest. + */ + +#include "vkd3d_shader_private.h" + +#define DXBC_CHECKSUM_BLOCK_SIZE 64 + +STATIC_ASSERT(sizeof(unsigned int) == 4); + +struct md5_ctx +{ + unsigned int i[2]; + unsigned int buf[4]; + unsigned char in[DXBC_CHECKSUM_BLOCK_SIZE]; + unsigned char digest[16]; +}; + +/* The four core functions - F1 is optimized somewhat */ + +/* #define F1(x, y, z) (x & y | ~x & z) */ +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +/* This is the central step in the MD5 algorithm. */ +#define MD5STEP(f, w, x, y, z, data, s) \ + (w += f(x, y, z) + data, w = w << s | w >> (32 - s), w += x) + +/* + * The core of the MD5 algorithm, this alters an existing MD5 hash to + * reflect the addition of 16 longwords of new data. md5_update blocks + * the data and converts bytes into longwords for this routine. + */ +static void md5_transform(unsigned int buf[4], const unsigned int in[16]) +{ + unsigned int a, b, c, d; + + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +/* + * Note: this code is harmless on little-endian machines. + */ +static void byte_reverse(unsigned char *buf, unsigned longs) +{ + unsigned int t; + + do + { + t = vkd3d_make_u32(vkd3d_make_u16(buf[0], buf[1]), vkd3d_make_u16(buf[2], buf[3])); + *(unsigned int *)buf = t; + buf += 4; + } while (--longs); +} + +/* + * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious + * initialization constants. + */ +static void md5_init(struct md5_ctx *ctx) +{ + ctx->buf[0] = 0x67452301; + ctx->buf[1] = 0xefcdab89; + ctx->buf[2] = 0x98badcfe; + ctx->buf[3] = 0x10325476; + + ctx->i[0] = ctx->i[1] = 0; +} + +/* + * Update context to reflect the concatenation of another buffer full + * of bytes. + */ +static void md5_update(struct md5_ctx *ctx, const unsigned char *buf, unsigned int len) +{ + unsigned int t; + + /* Update bitcount */ + t = ctx->i[0]; + + if ((ctx->i[0] = t + (len << 3)) < t) + ctx->i[1]++; /* Carry from low to high */ + + ctx->i[1] += len >> 29; + t = (t >> 3) & 0x3f; + + /* Handle any leading odd-sized chunks */ + if (t) + { + unsigned char *p = (unsigned char *)ctx->in + t; + t = DXBC_CHECKSUM_BLOCK_SIZE - t; + + if (len < t) + { + memcpy(p, buf, len); + return; + } + + memcpy(p, buf, t); + byte_reverse(ctx->in, 16); + + md5_transform(ctx->buf, (unsigned int *)ctx->in); + + buf += t; + len -= t; + } + + /* Process data in 64-byte chunks */ + while (len >= DXBC_CHECKSUM_BLOCK_SIZE) + { + memcpy(ctx->in, buf, DXBC_CHECKSUM_BLOCK_SIZE); + byte_reverse(ctx->in, 16); + + md5_transform(ctx->buf, (unsigned int *)ctx->in); + + buf += DXBC_CHECKSUM_BLOCK_SIZE; + len -= DXBC_CHECKSUM_BLOCK_SIZE; + } + + /* Handle any remaining bytes of data. */ + memcpy(ctx->in, buf, len); +} + +static void dxbc_checksum_final(struct md5_ctx *ctx) +{ + unsigned int padding; + unsigned int length; + unsigned int count; + unsigned char *p; + + /* Compute number of bytes mod 64 */ + count = (ctx->i[0] >> 3) & 0x3F; + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + p = ctx->in + count; + *p++ = 0x80; + ++count; + + /* Bytes of padding needed to make 64 bytes */ + padding = DXBC_CHECKSUM_BLOCK_SIZE - count; + + /* Pad out to 56 mod 64 */ + if (padding < 8) + { + /* Two lots of padding: Pad the first block to 64 bytes */ + memset(p, 0, padding); + byte_reverse(ctx->in, 16); + md5_transform(ctx->buf, (unsigned int *)ctx->in); + + /* Now fill the next block */ + memset(ctx->in, 0, DXBC_CHECKSUM_BLOCK_SIZE); + } + else + { + /* Make place for bitcount at the beginning of the block */ + memmove(&ctx->in[4], ctx->in, count); + + /* Pad block to 60 bytes */ + memset(p + 4, 0, padding - 4); + } + + /* Append length in bits and transform */ + length = ctx->i[0]; + memcpy(&ctx->in[0], &length, sizeof(length)); + byte_reverse(&ctx->in[4], 14); + length = ctx->i[0] >> 2 | 0x1; + memcpy(&ctx->in[DXBC_CHECKSUM_BLOCK_SIZE - 4], &length, sizeof(length)); + + md5_transform(ctx->buf, (unsigned int *)ctx->in); + byte_reverse((unsigned char *)ctx->buf, 4); + memcpy(ctx->digest, ctx->buf, 16); +} + +#define DXBC_CHECKSUM_SKIP_BYTE_COUNT 20 + +void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]) +{ + const uint8_t *ptr = dxbc; + struct md5_ctx ctx; + + assert(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); + ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; + size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; + + md5_init(&ctx); + md5_update(&ctx, ptr, size); + dxbc_checksum_final(&ctx); + + memcpy(checksum, ctx.digest, sizeof(ctx.digest)); +} diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c new file mode 100644 index 00000000000..c5518752a2b --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -0,0 +1,956 @@ +/* + * Copyright 2002-2003 Jason Edmeades + * Copyright 2002-2003 Raphael Junqueira + * Copyright 2004 Christian Costa + * Copyright 2005 Oliver Stieber + * Copyright 2006 Ivan Gyurdiev + * Copyright 2007-2008 Stefan Dösinger for CodeWeavers + * Copyright 2009, 2021 Henri Verbeet for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" + +#define VKD3D_SM1_VS 0xfffeu +#define VKD3D_SM1_PS 0xffffu + +#define VKD3D_SM1_DCL_USAGE_SHIFT 0u +#define VKD3D_SM1_DCL_USAGE_MASK (0xfu << VKD3D_SM1_DCL_USAGE_SHIFT) +#define VKD3D_SM1_DCL_USAGE_INDEX_SHIFT 16u +#define VKD3D_SM1_DCL_USAGE_INDEX_MASK (0xfu << VKD3D_SM1_DCL_USAGE_INDEX_SHIFT) + +#define VKD3D_SM1_RESOURCE_TYPE_SHIFT 27u +#define VKD3D_SM1_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM1_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM1_OPCODE_MASK 0x0000ffffu + +#define VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT 16u +#define VKD3D_SM1_INSTRUCTION_FLAGS_MASK (0xffu << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT) + +#define VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT 24u +#define VKD3D_SM1_INSTRUCTION_LENGTH_MASK (0xfu << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT) + +#define VKD3D_SM1_COISSUE (0x1u << 30u) + +#define VKD3D_SM1_COMMENT_SIZE_SHIFT 16u +#define VKD3D_SM1_COMMENT_SIZE_MASK (0x7fffu << VKD3D_SM1_COMMENT_SIZE_SHIFT) + +#define VKD3D_SM1_INSTRUCTION_PREDICATED (0x1u << 28u) + +#define VKD3D_SM1_INSTRUCTION_PARAMETER (0x1u << 31u) + +#define VKD3D_SM1_REGISTER_NUMBER_MASK 0x000007ffu + +#define VKD3D_SM1_REGISTER_TYPE_SHIFT 28u +#define VKD3D_SM1_REGISTER_TYPE_MASK (0x7u << VKD3D_SM1_REGISTER_TYPE_SHIFT) +#define VKD3D_SM1_REGISTER_TYPE_SHIFT2 8u +#define VKD3D_SM1_REGISTER_TYPE_MASK2 (0x18u << VKD3D_SM1_REGISTER_TYPE_SHIFT2) + +#define VKD3D_SM1_ADDRESS_MODE_SHIFT 13u +#define VKD3D_SM1_ADDRESS_MODE_MASK (0x1u << VKD3D_SM1_ADDRESS_MODE_SHIFT) + +#define VKD3D_SM1_DST_MODIFIER_SHIFT 20u +#define VKD3D_SM1_DST_MODIFIER_MASK (0xfu << VKD3D_SM1_DST_MODIFIER_SHIFT) + +#define VKD3D_SM1_DSTSHIFT_SHIFT 24u +#define VKD3D_SM1_DSTSHIFT_MASK (0xfu << VKD3D_SM1_DSTSHIFT_SHIFT) + +#define VKD3D_SM1_WRITEMASK_SHIFT 16u +#define VKD3D_SM1_WRITEMASK_MASK (0xfu << VKD3D_SM1_WRITEMASK_SHIFT) + +#define VKD3D_SM1_SWIZZLE_SHIFT 16u +#define VKD3D_SM1_SWIZZLE_MASK (0xffu << VKD3D_SM1_SWIZZLE_SHIFT) +#define VKD3D_SM1_SWIZZLE_DEFAULT (0u | (1u << 2) | (2u << 4) | (3u << 6)) + +#define VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(idx) (2u * (idx)) +#define VKD3D_SM1_SWIZZLE_COMPONENT_MASK(idx) (0x3u << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(idx)) + +#define VKD3D_SM1_SRC_MODIFIER_SHIFT 24u +#define VKD3D_SM1_SRC_MODIFIER_MASK (0xfu << VKD3D_SM1_SRC_MODIFIER_SHIFT) + +#define VKD3D_SM1_END 0x0000ffffu + +#define VKD3D_SM1_VERSION_MAJOR(version) (((version) >> 8u) & 0xffu) +#define VKD3D_SM1_VERSION_MINOR(version) (((version) >> 0u) & 0xffu) + +enum vkd3d_sm1_address_mode_type +{ + VKD3D_SM1_ADDRESS_MODE_ABSOLUTE = 0x0, + VKD3D_SM1_ADDRESS_MODE_RELATIVE = 0x1, +}; + +enum vkd3d_sm1_resource_type +{ + VKD3D_SM1_RESOURCE_UNKNOWN = 0x0, + VKD3D_SM1_RESOURCE_TEXTURE_1D = 0x1, + VKD3D_SM1_RESOURCE_TEXTURE_2D = 0x2, + VKD3D_SM1_RESOURCE_TEXTURE_CUBE = 0x3, + VKD3D_SM1_RESOURCE_TEXTURE_3D = 0x4, +}; + +enum vkd3d_sm1_opcode +{ + VKD3D_SM1_OP_NOP = 0x00, + VKD3D_SM1_OP_MOV = 0x01, + VKD3D_SM1_OP_ADD = 0x02, + VKD3D_SM1_OP_SUB = 0x03, + VKD3D_SM1_OP_MAD = 0x04, + VKD3D_SM1_OP_MUL = 0x05, + VKD3D_SM1_OP_RCP = 0x06, + VKD3D_SM1_OP_RSQ = 0x07, + VKD3D_SM1_OP_DP3 = 0x08, + VKD3D_SM1_OP_DP4 = 0x09, + VKD3D_SM1_OP_MIN = 0x0a, + VKD3D_SM1_OP_MAX = 0x0b, + VKD3D_SM1_OP_SLT = 0x0c, + VKD3D_SM1_OP_SGE = 0x0d, + VKD3D_SM1_OP_EXP = 0x0e, + VKD3D_SM1_OP_LOG = 0x0f, + VKD3D_SM1_OP_LIT = 0x10, + VKD3D_SM1_OP_DST = 0x11, + VKD3D_SM1_OP_LRP = 0x12, + VKD3D_SM1_OP_FRC = 0x13, + VKD3D_SM1_OP_M4x4 = 0x14, + VKD3D_SM1_OP_M4x3 = 0x15, + VKD3D_SM1_OP_M3x4 = 0x16, + VKD3D_SM1_OP_M3x3 = 0x17, + VKD3D_SM1_OP_M3x2 = 0x18, + VKD3D_SM1_OP_CALL = 0x19, + VKD3D_SM1_OP_CALLNZ = 0x1a, + VKD3D_SM1_OP_LOOP = 0x1b, + VKD3D_SM1_OP_RET = 0x1c, + VKD3D_SM1_OP_ENDLOOP = 0x1d, + VKD3D_SM1_OP_LABEL = 0x1e, + VKD3D_SM1_OP_DCL = 0x1f, + VKD3D_SM1_OP_POW = 0x20, + VKD3D_SM1_OP_CRS = 0x21, + VKD3D_SM1_OP_SGN = 0x22, + VKD3D_SM1_OP_ABS = 0x23, + VKD3D_SM1_OP_NRM = 0x24, + VKD3D_SM1_OP_SINCOS = 0x25, + VKD3D_SM1_OP_REP = 0x26, + VKD3D_SM1_OP_ENDREP = 0x27, + VKD3D_SM1_OP_IF = 0x28, + VKD3D_SM1_OP_IFC = 0x29, + VKD3D_SM1_OP_ELSE = 0x2a, + VKD3D_SM1_OP_ENDIF = 0x2b, + VKD3D_SM1_OP_BREAK = 0x2c, + VKD3D_SM1_OP_BREAKC = 0x2d, + VKD3D_SM1_OP_MOVA = 0x2e, + VKD3D_SM1_OP_DEFB = 0x2f, + VKD3D_SM1_OP_DEFI = 0x30, + + VKD3D_SM1_OP_TEXCOORD = 0x40, + VKD3D_SM1_OP_TEXKILL = 0x41, + VKD3D_SM1_OP_TEX = 0x42, + VKD3D_SM1_OP_TEXBEM = 0x43, + VKD3D_SM1_OP_TEXBEML = 0x44, + VKD3D_SM1_OP_TEXREG2AR = 0x45, + VKD3D_SM1_OP_TEXREG2GB = 0x46, + VKD3D_SM1_OP_TEXM3x2PAD = 0x47, + VKD3D_SM1_OP_TEXM3x2TEX = 0x48, + VKD3D_SM1_OP_TEXM3x3PAD = 0x49, + VKD3D_SM1_OP_TEXM3x3TEX = 0x4a, + VKD3D_SM1_OP_TEXM3x3DIFF = 0x4b, + VKD3D_SM1_OP_TEXM3x3SPEC = 0x4c, + VKD3D_SM1_OP_TEXM3x3VSPEC = 0x4d, + VKD3D_SM1_OP_EXPP = 0x4e, + VKD3D_SM1_OP_LOGP = 0x4f, + VKD3D_SM1_OP_CND = 0x50, + VKD3D_SM1_OP_DEF = 0x51, + VKD3D_SM1_OP_TEXREG2RGB = 0x52, + VKD3D_SM1_OP_TEXDP3TEX = 0x53, + VKD3D_SM1_OP_TEXM3x2DEPTH = 0x54, + VKD3D_SM1_OP_TEXDP3 = 0x55, + VKD3D_SM1_OP_TEXM3x3 = 0x56, + VKD3D_SM1_OP_TEXDEPTH = 0x57, + VKD3D_SM1_OP_CMP = 0x58, + VKD3D_SM1_OP_BEM = 0x59, + VKD3D_SM1_OP_DP2ADD = 0x5a, + VKD3D_SM1_OP_DSX = 0x5b, + VKD3D_SM1_OP_DSY = 0x5c, + VKD3D_SM1_OP_TEXLDD = 0x5d, + VKD3D_SM1_OP_SETP = 0x5e, + VKD3D_SM1_OP_TEXLDL = 0x5f, + VKD3D_SM1_OP_BREAKP = 0x60, + + VKD3D_SM1_OP_PHASE = 0xfffd, + VKD3D_SM1_OP_COMMENT = 0xfffe, + VKD3D_SM1_OP_END = 0Xffff, +}; + +struct vkd3d_sm1_opcode_info +{ + enum vkd3d_sm1_opcode sm1_opcode; + unsigned int dst_count; + unsigned int src_count; + enum vkd3d_shader_opcode vkd3d_opcode; + struct + { + unsigned int major, minor; + } min_version, max_version; +}; + +struct vkd3d_shader_sm1_parser +{ + const struct vkd3d_sm1_opcode_info *opcode_table; + const uint32_t *start, *end; + bool abort; + + struct vkd3d_shader_src_param src_rel_addr[4]; + struct vkd3d_shader_src_param pred_rel_addr; + struct vkd3d_shader_src_param dst_rel_addr; + struct vkd3d_shader_src_param src_param[4]; + struct vkd3d_shader_src_param pred_param; + struct vkd3d_shader_dst_param dst_param; + + struct vkd3d_shader_parser p; +}; + +/* This table is not order or position dependent. */ +static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = +{ + /* Arithmetic */ + {VKD3D_SM1_OP_NOP, 0, 0, VKD3DSIH_NOP}, + {VKD3D_SM1_OP_MOV, 1, 1, VKD3DSIH_MOV}, + {VKD3D_SM1_OP_MOVA, 1, 1, VKD3DSIH_MOVA, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ADD, 1, 2, VKD3DSIH_ADD}, + {VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB}, + {VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD}, + {VKD3D_SM1_OP_MUL, 1, 2, VKD3DSIH_MUL}, + {VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP}, + {VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ}, + {VKD3D_SM1_OP_DP3, 1, 2, VKD3DSIH_DP3}, + {VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4}, + {VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN}, + {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX}, + {VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT}, + {VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE}, + {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS}, + {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP}, + {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG}, + {VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP}, + {VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP}, + {VKD3D_SM1_OP_LIT, 1, 1, VKD3DSIH_LIT}, + {VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST}, + {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP}, + {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC}, + {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW}, + {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS}, + {VKD3D_SM1_OP_SGN, 1, 3, VKD3DSIH_SGN, {2, 0}, { 2, 1}}, + {VKD3D_SM1_OP_SGN, 1, 1, VKD3DSIH_SGN, {3, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM,}, + {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}}, + {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}}, + /* Matrix */ + {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4}, + {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3}, + {VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4}, + {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3}, + {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2}, + /* Declarations */ + {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, + /* Constant definitions */ + {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, + {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, + /* Control flow */ + {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP}, + {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 0}, {~0u, ~0u}}, + + {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP}, + {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}}, + {0, 0, 0, VKD3DSIH_INVALID}, +}; + +static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = +{ + /* Arithmetic */ + {VKD3D_SM1_OP_NOP, 0, 0, VKD3DSIH_NOP}, + {VKD3D_SM1_OP_MOV, 1, 1, VKD3DSIH_MOV}, + {VKD3D_SM1_OP_ADD, 1, 2, VKD3DSIH_ADD}, + {VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB}, + {VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD}, + {VKD3D_SM1_OP_MUL, 1, 2, VKD3DSIH_MUL}, + {VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP}, + {VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ}, + {VKD3D_SM1_OP_DP3, 1, 2, VKD3DSIH_DP3}, + {VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4}, + {VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN}, + {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX}, + {VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT}, + {VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE}, + {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS}, + {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP}, + {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG}, + {VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP}, + {VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP}, + {VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST}, + {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP}, + {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC}, + {VKD3D_SM1_OP_CND, 1, 3, VKD3DSIH_CND, {1, 0}, { 1, 4}}, + {VKD3D_SM1_OP_CMP, 1, 3, VKD3DSIH_CMP, {1, 2}, { 3, 0}}, + {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW}, + {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS}, + {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM}, + {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}}, + {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_DP2ADD, 1, 3, VKD3DSIH_DP2ADD, {2, 0}, {~0u, ~0u}}, + /* Matrix */ + {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4}, + {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3}, + {VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4}, + {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3}, + {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2}, + /* Declarations */ + {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, + /* Constant definitions */ + {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, + {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, + /* Control flow */ + {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP}, + {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {3, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {3, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 1}, {~0u, ~0u}}, + /* Texture */ + {VKD3D_SM1_OP_TEXCOORD, 1, 0, VKD3DSIH_TEXCOORD, {0, 0}, { 1, 3}}, + {VKD3D_SM1_OP_TEXCOORD, 1, 1, VKD3DSIH_TEXCOORD, {1 ,4}, { 1, 4}}, + {VKD3D_SM1_OP_TEXKILL, 1, 0, VKD3DSIH_TEXKILL, {1 ,0}, { 3, 0}}, + {VKD3D_SM1_OP_TEX, 1, 0, VKD3DSIH_TEX, {0, 0}, { 1, 3}}, + {VKD3D_SM1_OP_TEX, 1, 1, VKD3DSIH_TEX, {1, 4}, { 1, 4}}, + {VKD3D_SM1_OP_TEX, 1, 2, VKD3DSIH_TEX, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_TEXBEM, 1, 1, VKD3DSIH_TEXBEM, {0, 0}, { 1, 3}}, + {VKD3D_SM1_OP_TEXBEML, 1, 1, VKD3DSIH_TEXBEML, {1, 0}, { 1, 3}}, + {VKD3D_SM1_OP_TEXREG2AR, 1, 1, VKD3DSIH_TEXREG2AR, {1, 0}, { 1, 3}}, + {VKD3D_SM1_OP_TEXREG2GB, 1, 1, VKD3DSIH_TEXREG2GB, {1, 0}, { 1, 3}}, + {VKD3D_SM1_OP_TEXREG2RGB, 1, 1, VKD3DSIH_TEXREG2RGB, {1, 2}, { 1, 3}}, + {VKD3D_SM1_OP_TEXM3x2PAD, 1, 1, VKD3DSIH_TEXM3x2PAD, {1, 0}, { 1, 3}}, + {VKD3D_SM1_OP_TEXM3x2TEX, 1, 1, VKD3DSIH_TEXM3x2TEX, {1, 0}, { 1, 3}}, + {VKD3D_SM1_OP_TEXM3x3PAD, 1, 1, VKD3DSIH_TEXM3x3PAD, {1, 0}, { 1, 3}}, + {VKD3D_SM1_OP_TEXM3x3DIFF, 1, 1, VKD3DSIH_TEXM3x3DIFF, {0, 0}, { 0, 0}}, + {VKD3D_SM1_OP_TEXM3x3SPEC, 1, 2, VKD3DSIH_TEXM3x3SPEC, {1, 0}, { 1, 3}}, + {VKD3D_SM1_OP_TEXM3x3VSPEC, 1, 1, VKD3DSIH_TEXM3x3VSPEC, {1, 0}, { 1, 3}}, + {VKD3D_SM1_OP_TEXM3x3TEX, 1, 1, VKD3DSIH_TEXM3x3TEX, {1, 0}, { 1, 3}}, + {VKD3D_SM1_OP_TEXDP3TEX, 1, 1, VKD3DSIH_TEXDP3TEX, {1, 2}, { 1, 3}}, + {VKD3D_SM1_OP_TEXM3x2DEPTH, 1, 1, VKD3DSIH_TEXM3x2DEPTH, {1, 3}, { 1, 3}}, + {VKD3D_SM1_OP_TEXDP3, 1, 1, VKD3DSIH_TEXDP3, {1, 2}, { 1, 3}}, + {VKD3D_SM1_OP_TEXM3x3, 1, 1, VKD3DSIH_TEXM3x3, {1, 2}, { 1, 3}}, + {VKD3D_SM1_OP_TEXDEPTH, 1, 0, VKD3DSIH_TEXDEPTH, {1, 4}, { 1, 4}}, + {VKD3D_SM1_OP_BEM, 1, 2, VKD3DSIH_BEM, {1, 4}, { 1, 4}}, + {VKD3D_SM1_OP_DSX, 1, 1, VKD3DSIH_DSX, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_DSY, 1, 1, VKD3DSIH_DSY, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_TEXLDD, 1, 4, VKD3DSIH_TEXLDD, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP}, + {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_PHASE, 0, 0, VKD3DSIH_PHASE}, + {0, 0, 0, VKD3DSIH_INVALID}, +}; + +static const enum vkd3d_shader_resource_type resource_type_table[] = +{ + /* VKD3D_SM1_RESOURCE_UNKNOWN */ VKD3D_SHADER_RESOURCE_NONE, + /* VKD3D_SM1_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, + /* VKD3D_SM1_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, + /* VKD3D_SM1_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, + /* VKD3D_SM1_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, +}; + +static struct vkd3d_shader_sm1_parser *vkd3d_shader_sm1_parser(struct vkd3d_shader_parser *parser) +{ + return CONTAINING_RECORD(parser, struct vkd3d_shader_sm1_parser, p); +} + +static uint32_t read_u32(const uint32_t **ptr) +{ + return *(*ptr)++; +} + +static bool shader_ver_ge(const struct vkd3d_shader_version *v, unsigned int major, unsigned int minor) +{ + return v->major > major || (v->major == major && v->minor >= minor); +} + +static bool shader_ver_le(const struct vkd3d_shader_version *v, unsigned int major, unsigned int minor) +{ + return v->major < major || (v->major == major && v->minor <= minor); +} + +static bool has_relative_address(uint32_t param) +{ + enum vkd3d_sm1_address_mode_type address_mode; + + address_mode = (param & VKD3D_SM1_ADDRESS_MODE_MASK) >> VKD3D_SM1_ADDRESS_MODE_SHIFT; + + return address_mode == VKD3D_SM1_ADDRESS_MODE_RELATIVE; +} + +static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info( + const struct vkd3d_shader_sm1_parser *sm1, enum vkd3d_sm1_opcode opcode) +{ + const struct vkd3d_sm1_opcode_info *info; + unsigned int i = 0; + + for (;;) + { + info = &sm1->opcode_table[i++]; + if (info->vkd3d_opcode == VKD3DSIH_INVALID) + return NULL; + + if (opcode == info->sm1_opcode + && shader_ver_ge(&sm1->p.shader_version, info->min_version.major, info->min_version.minor) + && (shader_ver_le(&sm1->p.shader_version, info->max_version.major, info->max_version.minor) + || !info->max_version.major)) + return info; + } +} + +static unsigned int shader_sm1_get_swizzle_component(uint32_t swizzle, unsigned int idx) +{ + return (swizzle & VKD3D_SM1_SWIZZLE_COMPONENT_MASK(idx)) >> VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(idx); +} + +static uint32_t swizzle_from_sm1(uint32_t swizzle) +{ + return vkd3d_shader_create_swizzle(shader_sm1_get_swizzle_component(swizzle, 0), + shader_sm1_get_swizzle_component(swizzle, 1), + shader_sm1_get_swizzle_component(swizzle, 2), + shader_sm1_get_swizzle_component(swizzle, 3)); +} + +static void shader_sm1_parse_src_param(uint32_t param, const struct vkd3d_shader_src_param *rel_addr, + struct vkd3d_shader_src_param *src) +{ + src->reg.type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) + | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); + src->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + src->reg.non_uniform = false; + src->reg.data_type = VKD3D_DATA_FLOAT; + src->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; + src->reg.idx[0].rel_addr = rel_addr; + src->reg.idx[1].offset = ~0u; + src->reg.idx[1].rel_addr = NULL; + src->reg.idx[2].offset = ~0u; + src->reg.idx[2].rel_addr = NULL; + src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); + src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; +} + +static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader_src_param *rel_addr, + struct vkd3d_shader_dst_param *dst) +{ + dst->reg.type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) + | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); + dst->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + dst->reg.non_uniform = false; + dst->reg.data_type = VKD3D_DATA_FLOAT; + dst->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; + dst->reg.idx[0].rel_addr = rel_addr; + dst->reg.idx[1].offset = ~0u; + dst->reg.idx[1].rel_addr = NULL; + dst->reg.idx[2].offset = ~0u; + dst->reg.idx[2].rel_addr = NULL; + dst->write_mask = (param & VKD3D_SM1_WRITEMASK_MASK) >> VKD3D_SM1_WRITEMASK_SHIFT; + dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT; + dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; +} + +/* Read a parameter token from the input stream, and possibly a relative + * addressing token. */ +static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, + const uint32_t **ptr, uint32_t *token, uint32_t *addr_token) +{ + if (*ptr >= sm1->end) + { + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF, + "Attempted to read a parameter token, but no more tokens are remaining."); + sm1->abort = true; + *token = 0; + return; + } + *token = read_u32(ptr); + if (!has_relative_address(*token)) + return; + + /* PS >= 3.0 have relative addressing (with token) + * VS >= 2.0 have relative addressing (with token) + * VS >= 1.0 < 2.0 have relative addressing (without token) + * The version check below should work in general. */ + if (sm1->p.shader_version.major < 2) + { + *addr_token = (1u << 31) + | ((VKD3DSPR_ADDR << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2) + | ((VKD3DSPR_ADDR << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK) + | (VKD3D_SM1_SWIZZLE_DEFAULT << VKD3D_SM1_SWIZZLE_SHIFT); + return; + } + + if (*ptr >= sm1->end) + { + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF, + "Attempted to read an indirect addressing token, but no more tokens are remaining."); + sm1->abort = true; + *addr_token = 0; + return; + } + *addr_token = read_u32(ptr); +} + +/* Skip the parameter tokens for an opcode. */ +static void shader_sm1_skip_opcode(const struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, + const struct vkd3d_sm1_opcode_info *opcode_info, uint32_t opcode_token) +{ + unsigned int length; + + /* Version 2.0+ shaders may contain address tokens, but fortunately they + * have a useful length mask - use it here. Version 1.x shaders contain no + * such tokens. */ + if (sm1->p.shader_version.major >= 2) + { + length = (opcode_token & VKD3D_SM1_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; + *ptr += length; + return; + } + + *ptr += (opcode_info->dst_count + opcode_info->src_count); +} + +static void shader_sm1_destroy(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); + + free_shader_desc(&sm1->p.shader_desc); + vkd3d_free(sm1); +} + +static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, + struct vkd3d_shader_src_param *src_param, struct vkd3d_shader_src_param *src_rel_addr) +{ + uint32_t token, addr_token; + + shader_sm1_read_param(sm1, ptr, &token, &addr_token); + if (has_relative_address(token)) + shader_sm1_parse_src_param(addr_token, NULL, src_rel_addr); + else + src_rel_addr = NULL; + shader_sm1_parse_src_param(token, src_rel_addr, src_param); +} + +static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, + struct vkd3d_shader_dst_param *dst_param, struct vkd3d_shader_src_param *dst_rel_addr) +{ + uint32_t token, addr_token; + + shader_sm1_read_param(sm1, ptr, &token, &addr_token); + if (has_relative_address(token)) + shader_sm1_parse_src_param(addr_token, NULL, dst_rel_addr); + else + dst_rel_addr = NULL; + shader_sm1_parse_dst_param(token, dst_rel_addr, dst_param); +} + +static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, + const uint32_t **ptr, struct vkd3d_shader_semantic *semantic) +{ + enum vkd3d_sm1_resource_type resource_type; + struct vkd3d_shader_register_range *range; + uint32_t usage_token, dst_token; + + if (*ptr >= sm1->end || sm1->end - *ptr < 2) + { + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF, + "Attempted to read a declaration instruction, but not enough tokens are remaining."); + sm1->abort = true; + return; + } + + usage_token = read_u32(ptr); + dst_token = read_u32(ptr); + + semantic->usage = (usage_token & VKD3D_SM1_DCL_USAGE_MASK) >> VKD3D_SM1_DCL_USAGE_SHIFT; + semantic->usage_idx = (usage_token & VKD3D_SM1_DCL_USAGE_INDEX_MASK) >> VKD3D_SM1_DCL_USAGE_INDEX_SHIFT; + resource_type = (usage_token & VKD3D_SM1_RESOURCE_TYPE_MASK) >> VKD3D_SM1_RESOURCE_TYPE_SHIFT; + if (resource_type >= ARRAY_SIZE(resource_type_table)) + { + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE, + "Invalid resource type %#x.", resource_type); + semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; + } + else + { + semantic->resource_type = resource_type_table[resource_type]; + } + semantic->resource_data_type[0] = VKD3D_DATA_FLOAT; + semantic->resource_data_type[1] = VKD3D_DATA_FLOAT; + semantic->resource_data_type[2] = VKD3D_DATA_FLOAT; + semantic->resource_data_type[3] = VKD3D_DATA_FLOAT; + shader_sm1_parse_dst_param(dst_token, NULL, &semantic->resource.reg); + range = &semantic->resource.range; + range->space = 0; + range->first = range->last = semantic->resource.reg.reg.idx[0].offset; +} + +static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, + struct vkd3d_shader_src_param *src_param, enum vkd3d_immconst_type type, enum vkd3d_data_type data_type) +{ + unsigned int count = type == VKD3D_IMMCONST_VEC4 ? 4 : 1; + + if (*ptr >= sm1->end || sm1->end - *ptr < count) + { + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF, + "Attempted to read a constant definition, but not enough tokens are remaining. " + "%zu token(s) available, %u required.", sm1->end - *ptr, count); + sm1->abort = true; + return; + } + + src_param->reg.type = VKD3DSPR_IMMCONST; + src_param->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + src_param->reg.non_uniform = false; + src_param->reg.data_type = data_type; + src_param->reg.idx[0].offset = ~0u; + src_param->reg.idx[0].rel_addr = NULL; + src_param->reg.idx[1].offset = ~0u; + src_param->reg.idx[1].rel_addr = NULL; + src_param->reg.idx[2].offset = ~0u; + src_param->reg.idx[2].rel_addr = NULL; + src_param->reg.immconst_type = type; + memcpy(src_param->reg.u.immconst_uint, *ptr, count * sizeof(uint32_t)); + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + src_param->modifiers = 0; + + *ptr += count; +} + +static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) +{ + const uint32_t **ptr = &sm1->p.ptr; + const char *comment; + unsigned int size; + size_t remaining; + uint32_t token; + + if (*ptr >= sm1->end) + return; + + remaining = sm1->end - *ptr; + + token = **ptr; + while ((token & VKD3D_SM1_OPCODE_MASK) == VKD3D_SM1_OP_COMMENT) + { + size = (token & VKD3D_SM1_COMMENT_SIZE_MASK) >> VKD3D_SM1_COMMENT_SIZE_SHIFT; + + if (size > --remaining) + { + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF, + "Encountered a %u token comment, but only %zu token(s) is/are remaining.", + size, remaining); + return; + } + + comment = (const char *)++(*ptr); + remaining -= size; + *ptr += size; + + if (size > 1 && *(const uint32_t *)comment == TAG_TEXT) + { + const char *end = comment + size * sizeof(token); + const char *p = comment + sizeof(token); + const char *line; + + TRACE("// TEXT\n"); + for (line = p; line < end; line = p) + { + if (!(p = memchr(line, '\n', end - line))) + p = end; + else + ++p; + TRACE("// %s\n", debugstr_an(line, p - line)); + } + } + else if (size) + { + TRACE("// %s\n", debugstr_an(comment, size * sizeof(token))); + } + else + break; + + if (!remaining) + break; + token = **ptr; + } +} + +static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) +{ + if ((ins->handler_idx == VKD3DSIH_BREAKP || ins->handler_idx == VKD3DSIH_IF) && ins->flags) + { + vkd3d_shader_parser_warning(&sm1->p, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS, + "Ignoring unexpected instruction flags %#x.", ins->flags); + ins->flags = 0; + } +} + +static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); + const struct vkd3d_sm1_opcode_info *opcode_info; + const uint32_t **ptr = &parser->ptr; + uint32_t opcode_token; + const uint32_t *p; + unsigned int i; + + shader_sm1_read_comment(sm1); + + if (*ptr >= sm1->end) + { + WARN("End of byte-code, failed to read opcode.\n"); + goto fail; + } + + ++parser->location.line; + opcode_token = read_u32(ptr); + if (!(opcode_info = shader_sm1_get_opcode_info(sm1, opcode_token & VKD3D_SM1_OPCODE_MASK))) + { + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, + "Invalid opcode %#x (token 0x%08x, shader version %u.%u).", + opcode_token & VKD3D_SM1_OPCODE_MASK, opcode_token, + sm1->p.shader_version.major, sm1->p.shader_version.minor); + goto fail; + } + + ins->handler_idx = opcode_info->vkd3d_opcode; + ins->flags = (opcode_token & VKD3D_SM1_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; + ins->coissue = opcode_token & VKD3D_SM1_COISSUE; + ins->raw = false; + ins->structured = false; + ins->predicate = opcode_token & VKD3D_SM1_INSTRUCTION_PREDICATED ? &sm1->pred_param : NULL; + ins->dst_count = opcode_info->dst_count; + ins->dst = &sm1->dst_param; + ins->src_count = opcode_info->src_count; + ins->src = sm1->src_param; + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; + ins->resource_stride = 0; + ins->resource_data_type[0] = VKD3D_DATA_FLOAT; + ins->resource_data_type[1] = VKD3D_DATA_FLOAT; + ins->resource_data_type[2] = VKD3D_DATA_FLOAT; + ins->resource_data_type[3] = VKD3D_DATA_FLOAT; + memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); + + p = *ptr; + shader_sm1_skip_opcode(sm1, ptr, opcode_info, opcode_token); + if (*ptr > sm1->end) + { + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF, + "The current instruction ends %zu token(s) past the end of the shader.", + *ptr - sm1->end); + goto fail; + } + + if (ins->handler_idx == VKD3DSIH_DCL) + { + shader_sm1_read_semantic(sm1, &p, &ins->declaration.semantic); + } + else if (ins->handler_idx == VKD3DSIH_DEF) + { + shader_sm1_read_dst_param(sm1, &p, &sm1->dst_param, &sm1->dst_rel_addr); + shader_sm1_read_immconst(sm1, &p, &sm1->src_param[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_FLOAT); + } + else if (ins->handler_idx == VKD3DSIH_DEFB) + { + shader_sm1_read_dst_param(sm1, &p, &sm1->dst_param, &sm1->dst_rel_addr); + shader_sm1_read_immconst(sm1, &p, &sm1->src_param[0], VKD3D_IMMCONST_SCALAR, VKD3D_DATA_UINT); + } + else if (ins->handler_idx == VKD3DSIH_DEFI) + { + shader_sm1_read_dst_param(sm1, &p, &sm1->dst_param, &sm1->dst_rel_addr); + shader_sm1_read_immconst(sm1, &p, &sm1->src_param[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_INT); + } + else + { + /* Destination token */ + if (ins->dst_count) + shader_sm1_read_dst_param(sm1, &p, &sm1->dst_param, &sm1->dst_rel_addr); + + /* Predication token */ + if (ins->predicate) + shader_sm1_read_src_param(sm1, &p, &sm1->pred_param, &sm1->pred_rel_addr); + + /* Other source tokens */ + for (i = 0; i < ins->src_count; ++i) + shader_sm1_read_src_param(sm1, &p, &sm1->src_param[i], &sm1->src_rel_addr[i]); + } + + if (sm1->abort) + { + sm1->abort = false; + goto fail; + } + + shader_sm1_validate_instruction(sm1, ins); + return; + +fail: + ins->handler_idx = VKD3DSIH_INVALID; + *ptr = sm1->end; +} + +static bool shader_sm1_is_end(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); + const uint32_t **ptr = &parser->ptr; + + shader_sm1_read_comment(sm1); + + if (*ptr >= sm1->end) + return true; + + if (**ptr == VKD3D_SM1_END) + { + ++(*ptr); + return true; + } + + return false; +} + +static void shader_sm1_reset(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); + + parser->ptr = sm1->start; + parser->failed = false; +} + +const struct vkd3d_shader_parser_ops shader_sm1_parser_ops = +{ + .parser_reset = shader_sm1_reset, + .parser_destroy = shader_sm1_destroy, + .parser_read_instruction = shader_sm1_read_instruction, + .parser_is_end = shader_sm1_is_end, +}; + +static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; + const uint32_t *code = compile_info->source.code; + size_t code_size = compile_info->source.size; + struct vkd3d_shader_desc *shader_desc; + struct vkd3d_shader_version version; + uint16_t shader_type; + size_t token_count; + + token_count = code_size / sizeof(*sm1->start); + + if (token_count < 2) + { + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF, + "Invalid shader size %zu (token count %zu). At least 2 tokens are required.", + code_size, token_count); + return VKD3D_ERROR_INVALID_SHADER; + } + + TRACE("Version: 0x%08x.\n", code[0]); + + shader_type = code[0] >> 16; + version.major = VKD3D_SM1_VERSION_MAJOR(code[0]); + version.minor = VKD3D_SM1_VERSION_MINOR(code[0]); + + switch (shader_type) + { + case VKD3D_SM1_VS: + version.type = VKD3D_SHADER_TYPE_VERTEX; + sm1->opcode_table = vs_opcode_table; + break; + + case VKD3D_SM1_PS: + version.type = VKD3D_SHADER_TYPE_PIXEL; + sm1->opcode_table = ps_opcode_table; + break; + + default: + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_D3DBC_INVALID_VERSION_TOKEN, + "Invalid shader type %#x (token 0x%08x).", shader_type, code[0]); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!shader_ver_le(&version, 3, 0)) + { + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_D3DBC_INVALID_VERSION_TOKEN, + "Invalid shader version %u.%u (token 0x%08x).", version.major, version.minor, code[0]); + return VKD3D_ERROR_INVALID_SHADER; + } + + sm1->start = &code[1]; + sm1->end = &code[token_count]; + + vkd3d_shader_parser_init(&sm1->p, message_context, compile_info->source_name, &version, &shader_sm1_parser_ops); + shader_desc = &sm1->p.shader_desc; + shader_desc->byte_code = code; + shader_desc->byte_code_size = code_size; + sm1->p.ptr = sm1->start; + + return VKD3D_OK; +} + +int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +{ + struct vkd3d_shader_sm1_parser *sm1; + int ret; + + if (!(sm1 = vkd3d_calloc(1, sizeof(*sm1)))) + { + ERR("Failed to allocate parser.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if ((ret = shader_sm1_init(sm1, compile_info, message_context)) < 0) + { + WARN("Failed to initialise shader parser, ret %d.\n", ret); + vkd3d_free(sm1); + return ret; + } + + *parser = &sm1->p; + + return VKD3D_OK; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c new file mode 100644 index 00000000000..1ab71ca4714 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -0,0 +1,3287 @@ +/* + * Copyright 2008-2009 Henri Verbeet for CodeWeavers + * Copyright 2010 Rico Schüller + * Copyright 2017 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" +#include "sm4.h" + +void dxbc_writer_init(struct dxbc_writer *dxbc) +{ + memset(dxbc, 0, sizeof(*dxbc)); +} + +void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) +{ + struct dxbc_writer_section *section; + + assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); + + section = &dxbc->sections[dxbc->section_count++]; + section->tag = tag; + section->data = data; + section->size = size; +} + +int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) +{ + size_t size_position, offsets_position, checksum_position, i; + struct vkd3d_bytecode_buffer buffer = {0}; + uint32_t checksum[4]; + + put_u32(&buffer, TAG_DXBC); + + checksum_position = bytecode_get_size(&buffer); + for (i = 0; i < 4; ++i) + put_u32(&buffer, 0); + + put_u32(&buffer, 1); /* version */ + size_position = put_u32(&buffer, 0); + put_u32(&buffer, dxbc->section_count); + + offsets_position = bytecode_get_size(&buffer); + for (i = 0; i < dxbc->section_count; ++i) + put_u32(&buffer, 0); + + for (i = 0; i < dxbc->section_count; ++i) + { + set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_get_size(&buffer)); + put_u32(&buffer, dxbc->sections[i].tag); + put_u32(&buffer, dxbc->sections[i].size); + bytecode_put_bytes(&buffer, dxbc->sections[i].data, dxbc->sections[i].size); + } + set_u32(&buffer, size_position, bytecode_get_size(&buffer)); + + vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); + for (i = 0; i < 4; ++i) + set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); + + if (!buffer.status) + { + out->code = buffer.data; + out->size = buffer.size; + } + return buffer.status; +} + +struct vkd3d_shader_src_param_entry +{ + struct list entry; + struct vkd3d_shader_src_param param; +}; + +struct vkd3d_shader_sm4_parser +{ + const uint32_t *start, *end; + + unsigned int output_map[MAX_REG_OUTPUT]; + + struct vkd3d_shader_src_param src_param[6]; + struct vkd3d_shader_dst_param dst_param[2]; + struct list src_free; + struct list src; + struct vkd3d_shader_immediate_constant_buffer icb; + + struct vkd3d_shader_parser p; +}; + +struct vkd3d_sm4_opcode_info +{ + enum vkd3d_sm4_opcode opcode; + enum vkd3d_shader_opcode handler_idx; + const char *dst_info; + const char *src_info; + void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); +}; + +static const enum vkd3d_primitive_type output_primitive_type_table[] = +{ + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, +}; + +static const enum vkd3d_primitive_type input_primitive_type_table[] = +{ + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, + /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, + /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, + /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, +}; + +static const enum vkd3d_shader_resource_type resource_type_table[] = +{ + /* 0 */ VKD3D_SHADER_RESOURCE_NONE, + /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, + /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, + /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, + /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, + /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, + /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, + /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, + /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, +}; + +static const enum vkd3d_data_type data_type_table[] = +{ + /* 0 */ VKD3D_DATA_FLOAT, + /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, + /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, + /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, + /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, + /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, + /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, + /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, + /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, + /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, +}; + +static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) +{ + return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); +} + +static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) +{ + const struct vkd3d_shader_version *version = &sm4->p.shader_version; + + return version->major >= 5 && version->minor >= 1; +} + +static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); +static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); + +static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, + const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) +{ + *register_space = 0; + + if (!shader_is_sm_5_1(priv)) + return true; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + + *register_space = *(*ptr)++; + return true; +} + +static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, &priv->src_param[0]); + ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? + VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; +} + +static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + enum vkd3d_sm4_shader_data_type type; + unsigned int icb_size; + + type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; + if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) + { + FIXME("Ignoring shader data type %#x.\n", type); + ins->handler_idx = VKD3DSIH_NOP; + return; + } + + ++tokens; + icb_size = token_count - 1; + if (icb_size % 4 || icb_size > MAX_IMMEDIATE_CONSTANT_BUFFER_SIZE) + { + FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + + priv->icb.vec4_count = icb_size / 4; + memcpy(priv->icb.data, tokens, sizeof(*tokens) * icb_size); + ins->declaration.icb = &priv->icb; +} + +static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, + const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) +{ + range->first = reg->idx[1].offset; + range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; + if (range->last < range->first) + { + FIXME("Invalid register range [%u:%u].\n", range->first, range->last); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, + "Last register %u must not be less than first register %u in range.\n", range->last, range->first); + } +} + +static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; + enum vkd3d_sm4_resource_type resource_type; + const uint32_t *end = &tokens[token_count]; + enum vkd3d_sm4_data_type data_type; + enum vkd3d_data_type reg_data_type; + DWORD components; + unsigned int i; + + resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; + if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) + { + FIXME("Unhandled resource type %#x.\n", resource_type); + semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; + } + else + { + semantic->resource_type = resource_type_table[resource_type]; + } + reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; + shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); + + components = *tokens++; + for (i = 0; i < VKD3D_VEC4_SIZE; i++) + { + data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); + + if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) + { + FIXME("Unhandled data type %#x.\n", data_type); + semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; + } + else + { + semantic->resource_data_type[i] = data_type_table[data_type]; + } + } + + if (reg_data_type == VKD3D_DATA_UAV) + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + + shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); +} + +static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); + shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); + if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) + ins->flags |= VKD3DSI_INDEXED_DYNAMIC; + + ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; + ins->declaration.cb.range.space = 0; + + if (shader_is_sm_5_1(priv)) + { + if (tokens >= end) + { + FIXME("Invalid ptr %p >= end %p.\n", tokens, end); + return; + } + + ins->declaration.cb.size = *tokens++; + shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); + } +} + +static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + const uint32_t *end = &tokens[token_count]; + + ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; + if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) + FIXME("Unhandled sampler mode %#x.\n", ins->flags); + shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); + shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); + shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); +} + +static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, + &ins->declaration.index_range.dst); + ins->declaration.index_range.register_count = *tokens; +} + +static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + enum vkd3d_sm4_output_primitive_type primitive_type; + + primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) + ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; + else + ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) + FIXME("Unhandled output primitive type %#x.\n", primitive_type); +} + +static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + enum vkd3d_sm4_input_primitive_type primitive_type; + + primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) + { + ins->declaration.primitive_type.type = VKD3D_PT_PATCH; + ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; + } + else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) + { + ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; + } + else + { + ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; + } + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) + FIXME("Unhandled input primitive type %#x.\n", primitive_type); +} + +static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.count = *tokens; +} + +static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); +} + +static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.register_semantic.reg); + ins->declaration.register_semantic.sysval_semantic = *tokens; +} + +static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); +} + +static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.register_semantic.reg); + ins->declaration.register_semantic.sysval_semantic = *tokens; +} + +static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.indexable_temp.register_idx = *tokens++; + ins->declaration.indexable_temp.register_size = *tokens++; + ins->declaration.indexable_temp.component_count = *tokens; +} + +static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; +} + +static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + priv->src_param[0].reg.u.fp_body_idx = *tokens++; + shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &priv->src_param[0]); +} + +static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.index = *tokens; +} + +static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.index = *tokens++; + FIXME("Ignoring set of function bodies (count %u).\n", *tokens); +} + +static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.fp.index = *tokens++; + ins->declaration.fp.body_count = *tokens++; + ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; + ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; + FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); +} + +static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) + >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.max_tessellation_factor = *(float *)tokens; +} + +static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.thread_group_size.x = *tokens++; + ins->declaration.thread_group_size.y = *tokens++; + ins->declaration.thread_group_size.z = *tokens++; +} + +static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + resource->byte_stride = *tokens++; + if (resource->byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); + ins->declaration.tgsm_raw.byte_count = *tokens; + if (ins->declaration.tgsm_raw.byte_count % 4) + FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); +} + +static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.tgsm_structured.reg); + ins->declaration.tgsm_structured.byte_stride = *tokens++; + ins->declaration.tgsm_structured.structure_count = *tokens; + if (ins->declaration.tgsm_structured.byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); +} + +static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + resource->byte_stride = *tokens++; + if (resource->byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; +} + +/* + * d -> VKD3D_DATA_DOUBLE + * f -> VKD3D_DATA_FLOAT + * i -> VKD3D_DATA_INT + * u -> VKD3D_DATA_UINT + * O -> VKD3D_DATA_OPAQUE + * R -> VKD3D_DATA_RESOURCE + * S -> VKD3D_DATA_SAMPLER + * U -> VKD3D_DATA_UAV + */ +static const struct vkd3d_sm4_opcode_info opcode_table[] = +{ + {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, + {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, + {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, + {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, + {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, + {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, + {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, + {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, + {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, + {VKD3D_SM4_OP_DISCARD, VKD3DSIH_TEXKILL, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, + {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, + {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, + {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, + {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, + {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, + {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, + {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, + {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, + {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, + {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, + {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, + {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, + {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, + {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, + {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, + {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, + {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, + {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, + {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, + {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, + {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, + {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, + {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, + {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, + {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, + {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, + {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, + {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, + {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, + {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, + {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, + {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, + {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, + {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, + {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, + {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, + {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", + shader_sm4_read_shader_data}, + {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, + {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, + {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, + {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, + {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, + {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, + {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, + {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, + {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, + {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, + {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, + {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, + {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, + {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, + {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, + {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, + {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, + {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, + {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, + {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, + {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, + {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, + {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, + {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, + {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, + {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, + {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, + {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, + {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, + {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "R", "", + shader_sm4_read_dcl_resource}, + {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", + shader_sm4_read_dcl_constant_buffer}, + {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", + shader_sm4_read_dcl_sampler}, + {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", + shader_sm4_read_dcl_index_range}, + {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", + shader_sm4_read_dcl_output_topology}, + {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", + shader_sm4_read_dcl_input_primitive}, + {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", + shader_sm4_read_declaration_dst}, + {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", + shader_sm4_read_dcl_input_ps}, + {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", + shader_sm4_read_dcl_input_ps_siv}, + {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", + shader_sm4_read_declaration_dst}, + {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", + shader_sm4_read_dcl_indexable_temp}, + {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", + shader_sm4_read_dcl_global_flags}, + {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, + {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, + {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, + {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, + {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, + {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, + {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, + {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, + {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, + {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, + {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", + shader_sm5_read_fcall}, + {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, + {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, + {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, + {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, + {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, + {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, + {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, + {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, + {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, + {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, + {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, + {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, + {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, + {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, + {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, + {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", + shader_sm5_read_dcl_function_body}, + {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", + shader_sm5_read_dcl_function_table}, + {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", + shader_sm5_read_dcl_interface}, + {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", + shader_sm5_read_control_point_count}, + {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", + shader_sm5_read_control_point_count}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", + shader_sm5_read_dcl_tessellator_domain}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", + shader_sm5_read_dcl_tessellator_partitioning}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", + shader_sm5_read_dcl_tessellator_output_primitive}, + {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", + shader_sm5_read_dcl_hs_max_tessfactor}, + {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", + shader_sm5_read_dcl_thread_group}, + {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", + shader_sm4_read_dcl_resource}, + {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", + shader_sm5_read_dcl_uav_raw}, + {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", + shader_sm5_read_dcl_uav_structured}, + {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", + shader_sm5_read_dcl_tgsm_raw}, + {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", + shader_sm5_read_dcl_tgsm_structured}, + {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", + shader_sm5_read_dcl_resource_raw}, + {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", + shader_sm5_read_dcl_resource_structured}, + {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, + {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, + {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, + {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, + {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, + {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, + {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, + {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, + {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, + {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", + shader_sm5_read_sync}, + {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, + {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, + {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, + {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, + {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, + {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, + {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, + {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, + {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, + {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, + {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, + {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, + {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, + {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, + {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, + {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, + {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, + {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, + {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, + {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, + {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, + {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, + {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, + {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, + {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, + {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, + {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, + {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, + {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, + {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, + {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, + {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, + {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, + {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, +}; + +static const enum vkd3d_shader_register_type register_type_table[] = +{ + /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, + /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, + /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, + /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, + /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, + /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, + /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, + /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, + /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, + /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, + /* UNKNOWN */ ~0u, + /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, + /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, + /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, + /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, + /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, + /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, + /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, + /* UNKNOWN */ ~0u, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, + /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, + /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, + /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, + /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, + /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, + /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, + /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, + /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, + /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, + /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, + /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, + /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, + /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, + /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, + /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, + /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, + /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, +}; + +static const enum vkd3d_shader_register_precision register_precision_table[] = +{ + /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, + /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, +}; + +static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) +{ + unsigned int i; + + for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) + { + if (opcode == opcode_table[i].opcode) return &opcode_table[i]; + } + + return NULL; +} + +static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) +{ + switch (sm4->p.shader_version.type) + { + case VKD3D_SHADER_TYPE_PIXEL: + if (reg->type == VKD3DSPR_OUTPUT) + { + unsigned int reg_idx = reg->idx[0].offset; + + if (reg_idx >= ARRAY_SIZE(sm4->output_map)) + { + ERR("Invalid output index %u.\n", reg_idx); + break; + } + + reg->type = VKD3DSPR_COLOROUT; + reg->idx[0].offset = sm4->output_map[reg_idx]; + } + break; + + default: + break; + } +} + +static enum vkd3d_data_type map_data_type(char t) +{ + switch (t) + { + case 'd': + return VKD3D_DATA_DOUBLE; + case 'f': + return VKD3D_DATA_FLOAT; + case 'i': + return VKD3D_DATA_INT; + case 'u': + return VKD3D_DATA_UINT; + case 'O': + return VKD3D_DATA_OPAQUE; + case 'R': + return VKD3D_DATA_RESOURCE; + case 'S': + return VKD3D_DATA_SAMPLER; + case 'U': + return VKD3D_DATA_UAV; + default: + ERR("Invalid data type '%c'.\n", t); + return VKD3D_DATA_FLOAT; + } +} + +static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); + struct vkd3d_shader_src_param_entry *e1, *e2; + + list_move_head(&sm4->src_free, &sm4->src); + LIST_FOR_EACH_ENTRY_SAFE(e1, e2, &sm4->src_free, struct vkd3d_shader_src_param_entry, entry) + { + vkd3d_free(e1); + } + free_shader_desc(&parser->shader_desc); + vkd3d_free(sm4); +} + +static struct vkd3d_shader_src_param *get_src_param(struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_src_param_entry *e; + struct list *elem; + + if (!list_empty(&priv->src_free)) + { + elem = list_head(&priv->src_free); + list_remove(elem); + } + else + { + if (!(e = vkd3d_malloc(sizeof(*e)))) + return NULL; + elem = &e->entry; + } + + list_add_tail(&priv->src, elem); + e = LIST_ENTRY(elem, struct vkd3d_shader_src_param_entry, entry); + return &e->param; +} + +static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) +{ + if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) + { + struct vkd3d_shader_src_param *rel_addr = get_src_param(priv); + + if (!(reg_idx->rel_addr = rel_addr)) + { + ERR("Failed to get src param for relative addressing.\n"); + return false; + } + + if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) + reg_idx->offset = *(*ptr)++; + else + reg_idx->offset = 0; + shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); + } + else + { + reg_idx->rel_addr = NULL; + reg_idx->offset = *(*ptr)++; + } + + return true; +} + +static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) +{ + switch (register_type) + { + case VKD3D_SM4_RT_SAMPLER: + case VKD3D_SM4_RT_RESOURCE: + case VKD3D_SM4_RT_CONSTBUFFER: + case VKD3D_SM5_RT_UAV: + return true; + + default: + return false; + } +} + +static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, + enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) +{ + enum vkd3d_sm4_register_precision precision; + enum vkd3d_sm4_register_type register_type; + enum vkd3d_sm4_extended_operand_type type; + enum vkd3d_sm4_register_modifier m; + uint32_t token, order, extended; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = *(*ptr)++; + + register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; + if (register_type >= ARRAY_SIZE(register_type_table) + || register_type_table[register_type] == VKD3DSPR_INVALID) + { + FIXME("Unhandled register type %#x.\n", register_type); + param->type = VKD3DSPR_TEMP; + } + else + { + param->type = register_type_table[register_type]; + } + param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + param->non_uniform = false; + param->data_type = data_type; + + *modifier = VKD3DSPSM_NONE; + if (token & VKD3D_SM4_EXTENDED_OPERAND) + { + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + extended = *(*ptr)++; + + if (extended & VKD3D_SM4_EXTENDED_OPERAND) + { + FIXME("Skipping second-order extended operand.\n"); + *ptr += *ptr < end; + } + + type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; + if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) + { + m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; + switch (m) + { + case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: + *modifier = VKD3DSPSM_NEG; + break; + + case VKD3D_SM4_REGISTER_MODIFIER_ABS: + *modifier = VKD3DSPSM_ABS; + break; + + case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: + *modifier = VKD3DSPSM_ABSNEG; + break; + + default: + FIXME("Unhandled register modifier %#x.\n", m); + /* fall-through */ + case VKD3D_SM4_REGISTER_MODIFIER_NONE: + break; + } + + precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; + if (precision >= ARRAY_SIZE(register_precision_table) + || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) + { + FIXME("Unhandled register precision %#x.\n", precision); + param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; + } + else + { + param->precision = register_precision_table[precision]; + } + + if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) + param->non_uniform = true; + + extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK + | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK + | VKD3D_SM4_EXTENDED_OPERAND); + if (extended) + FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); + } + else if (type) + { + FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); + } + } + + order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; + + if (order < 1) + { + param->idx[0].offset = ~0u; + param->idx[0].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order < 2) + { + param->idx[1].offset = ~0u; + param->idx[1].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order < 3) + { + param->idx[2].offset = ~0u; + param->idx[2].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order > 3) + { + WARN("Unhandled order %u.\n", order); + return false; + } + + if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) + { + enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; + unsigned int dword_count; + + switch (dimension) + { + case VKD3D_SM4_DIMENSION_SCALAR: + param->immconst_type = VKD3D_IMMCONST_SCALAR; + dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); + if (end - *ptr < dword_count) + { + WARN("Invalid ptr %p, end %p.\n", *ptr, end); + return false; + } + memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); + *ptr += dword_count; + break; + + case VKD3D_SM4_DIMENSION_VEC4: + param->immconst_type = VKD3D_IMMCONST_VEC4; + if (end - *ptr < VKD3D_VEC4_SIZE) + { + WARN("Invalid ptr %p, end %p.\n", *ptr, end); + return false; + } + memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); + *ptr += 4; + break; + + default: + FIXME("Unhandled dimension %#x.\n", dimension); + break; + } + } + else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) + { + /* SM5.1 places a symbol identifier in idx[0] and moves + * other values up one slot. Normalize to SM5.1. */ + param->idx[2] = param->idx[1]; + param->idx[1] = param->idx[0]; + } + + map_register(priv, param); + + return true; +} + +static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) +{ + switch (reg->type) + { + case VKD3DSPR_COVERAGE: + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_PRIMID: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_OUTSTENCILREF: + return true; + default: + return false; + } +} + +static uint32_t swizzle_from_sm4(uint32_t s) +{ + return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); +} + +static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) +{ + DWORD token; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = **ptr; + + if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) + { + ERR("Failed to read parameter.\n"); + return false; + } + + if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) + { + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + } + else + { + enum vkd3d_sm4_swizzle_type swizzle_type = + (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + + switch (swizzle_type) + { + case VKD3D_SM4_SWIZZLE_NONE: + if (shader_sm4_is_scalar_register(&src_param->reg)) + src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + else + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + break; + + case VKD3D_SM4_SWIZZLE_SCALAR: + src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; + src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; + break; + + case VKD3D_SM4_SWIZZLE_VEC4: + src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); + break; + + default: + FIXME("Unhandled swizzle type %#x.\n", swizzle_type); + break; + } + } + + return true; +} + +static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) +{ + enum vkd3d_shader_src_modifier modifier; + DWORD token; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = **ptr; + + if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) + { + ERR("Failed to read parameter.\n"); + return false; + } + + if (modifier != VKD3DSPSM_NONE) + { + ERR("Invalid source modifier %#x on destination register.\n", modifier); + return false; + } + + dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; + if (data_type == VKD3D_DATA_DOUBLE) + dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); + /* Scalar registers are declared with no write mask in shader bytecode. */ + if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) + dst_param->write_mask = VKD3DSP_WRITEMASK_0; + dst_param->modifiers = 0; + dst_param->shift = 0; + + return true; +} + +static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) +{ + enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; + + switch (modifier_type) + { + case VKD3D_SM4_MODIFIER_AOFFIMMI: + { + static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER + | VKD3D_SM4_MODIFIER_MASK + | VKD3D_SM4_AOFFIMMI_U_MASK + | VKD3D_SM4_AOFFIMMI_V_MASK + | VKD3D_SM4_AOFFIMMI_W_MASK; + + /* Bit fields are used for sign extension. */ + struct + { + int u : 4; + int v : 4; + int w : 4; + } aoffimmi; + + if (modifier & ~recognized_bits) + FIXME("Unhandled instruction modifier %#x.\n", modifier); + + aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; + aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; + aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; + ins->texel_offset.u = aoffimmi.u; + ins->texel_offset.v = aoffimmi.v; + ins->texel_offset.w = aoffimmi.w; + break; + } + + case VKD3D_SM5_MODIFIER_DATA_TYPE: + { + DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; + unsigned int i; + + for (i = 0; i < VKD3D_VEC4_SIZE; i++) + { + enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); + + if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) + { + FIXME("Unhandled data type %#x.\n", data_type); + ins->resource_data_type[i] = VKD3D_DATA_FLOAT; + } + else + { + ins->resource_data_type[i] = data_type_table[data_type]; + } + } + break; + } + + case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: + { + enum vkd3d_sm4_resource_type resource_type + = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; + + if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) + ins->raw = true; + else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) + ins->structured = true; + + if (resource_type < ARRAY_SIZE(resource_type_table)) + ins->resource_type = resource_type_table[resource_type]; + else + { + FIXME("Unhandled resource type %#x.\n", resource_type); + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; + } + + ins->resource_stride + = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; + break; + } + + default: + FIXME("Unhandled instruction modifier %#x.\n", modifier); + } +} + +static void shader_sm4_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); + const struct vkd3d_sm4_opcode_info *opcode_info; + uint32_t opcode_token, opcode, previous_token; + const uint32_t **ptr = &parser->ptr; + unsigned int i, len; + size_t remaining; + const uint32_t *p; + DWORD precise; + + list_move_head(&sm4->src_free, &sm4->src); + + if (*ptr >= sm4->end) + { + WARN("End of byte-code, failed to read opcode.\n"); + goto fail; + } + remaining = sm4->end - *ptr; + + ++parser->location.line; + + opcode_token = *(*ptr)++; + opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; + + len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); + if (!len) + { + if (remaining < 2) + { + WARN("End of byte-code, failed to read length token.\n"); + goto fail; + } + len = **ptr; + } + if (!len || remaining < len) + { + WARN("Read invalid length %u (remaining %zu).\n", len, remaining); + goto fail; + } + --len; + + if (!(opcode_info = get_opcode_info(opcode))) + { + FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); + ins->handler_idx = VKD3DSIH_INVALID; + *ptr += len; + return; + } + + ins->handler_idx = opcode_info->handler_idx; + ins->flags = 0; + ins->coissue = false; + ins->raw = false; + ins->structured = false; + ins->predicate = NULL; + ins->dst_count = strlen(opcode_info->dst_info); + ins->dst = sm4->dst_param; + ins->src_count = strlen(opcode_info->src_info); + ins->src = sm4->src_param; + assert(ins->dst_count <= ARRAY_SIZE(sm4->dst_param)); + assert(ins->src_count <= ARRAY_SIZE(sm4->src_param)); + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; + ins->resource_stride = 0; + ins->resource_data_type[0] = VKD3D_DATA_FLOAT; + ins->resource_data_type[1] = VKD3D_DATA_FLOAT; + ins->resource_data_type[2] = VKD3D_DATA_FLOAT; + ins->resource_data_type[3] = VKD3D_DATA_FLOAT; + memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); + + p = *ptr; + *ptr += len; + + if (opcode_info->read_opcode_func) + { + opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); + } + else + { + enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; + + previous_token = opcode_token; + while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) + shader_sm4_read_instruction_modifier(previous_token = *p++, ins); + + ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) + { + ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; + instruction_dst_modifier = VKD3DSPDM_SATURATE; + } + precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; + ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; + + for (i = 0; i < ins->dst_count; ++i) + { + if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), + &sm4->dst_param[i]))) + { + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + sm4->dst_param[i].modifiers |= instruction_dst_modifier; + } + + for (i = 0; i < ins->src_count; ++i) + { + if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), + &sm4->src_param[i]))) + { + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + } + } + + return; + +fail: + *ptr = sm4->end; + ins->handler_idx = VKD3DSIH_INVALID; + return; +} + +static bool shader_sm4_is_end(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); + + return parser->ptr == sm4->end; +} + +static void shader_sm4_reset(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); + + parser->ptr = sm4->start; + parser->failed = false; +} + +static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = +{ + .parser_reset = shader_sm4_reset, + .parser_destroy = shader_sm4_destroy, + .parser_read_instruction = shader_sm4_read_instruction, + .parser_is_end = shader_sm4_is_end, +}; + +static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, + size_t byte_code_size, const char *source_name, const struct vkd3d_shader_signature *output_signature, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_version version; + uint32_t version_token, token_count; + unsigned int i; + + if (byte_code_size / sizeof(*byte_code) < 2) + { + WARN("Invalid byte code size %lu.\n", (long)byte_code_size); + return false; + } + + version_token = byte_code[0]; + TRACE("Version: 0x%08x.\n", version_token); + token_count = byte_code[1]; + TRACE("Token count: %u.\n", token_count); + + if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) + { + WARN("Invalid token count %u.\n", token_count); + return false; + } + + sm4->start = &byte_code[2]; + sm4->end = &byte_code[token_count]; + + switch (version_token >> 16) + { + case VKD3D_SM4_PS: + version.type = VKD3D_SHADER_TYPE_PIXEL; + break; + + case VKD3D_SM4_VS: + version.type = VKD3D_SHADER_TYPE_VERTEX; + break; + + case VKD3D_SM4_GS: + version.type = VKD3D_SHADER_TYPE_GEOMETRY; + break; + + case VKD3D_SM5_HS: + version.type = VKD3D_SHADER_TYPE_HULL; + break; + + case VKD3D_SM5_DS: + version.type = VKD3D_SHADER_TYPE_DOMAIN; + break; + + case VKD3D_SM5_CS: + version.type = VKD3D_SHADER_TYPE_COMPUTE; + break; + + default: + FIXME("Unrecognised shader type %#x.\n", version_token >> 16); + } + version.major = VKD3D_SM4_VERSION_MAJOR(version_token); + version.minor = VKD3D_SM4_VERSION_MINOR(version_token); + + vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops); + sm4->p.ptr = sm4->start; + + memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); + for (i = 0; i < output_signature->element_count; ++i) + { + struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; + + if (version.type == VKD3D_SHADER_TYPE_PIXEL + && ascii_strcasecmp(e->semantic_name, "SV_Target")) + continue; + if (e->register_index >= ARRAY_SIZE(sm4->output_map)) + { + WARN("Invalid output index %u.\n", e->register_index); + continue; + } + + sm4->output_map[e->register_index] = e->semantic_index; + } + + list_init(&sm4->src_free); + list_init(&sm4->src); + + return true; +} + +static bool require_space(size_t offset, size_t count, size_t size, size_t data_size) +{ + return !count || (data_size - offset) / count >= size; +} + +static void read_dword(const char **ptr, uint32_t *d) +{ + memcpy(d, *ptr, sizeof(*d)); + *ptr += sizeof(*d); +} + +static void read_float(const char **ptr, float *f) +{ + STATIC_ASSERT(sizeof(float) == sizeof(uint32_t)); + read_dword(ptr, (uint32_t *)f); +} + +static void skip_dword_unknown(const char **ptr, unsigned int count) +{ + unsigned int i; + uint32_t d; + + WARN("Skipping %u unknown DWORDs:\n", count); + for (i = 0; i < count; ++i) + { + read_dword(ptr, &d); + WARN("\t0x%08x\n", d); + } +} + +static const char *shader_get_string(const char *data, size_t data_size, DWORD offset) +{ + size_t len, max_len; + + if (offset >= data_size) + { + WARN("Invalid offset %#x (data size %#lx).\n", offset, (long)data_size); + return NULL; + } + + max_len = data_size - offset; + len = strnlen(data + offset, max_len); + + if (len == max_len) + return NULL; + + return data + offset; +} + +static int parse_dxbc(const char *data, size_t data_size, + struct vkd3d_shader_message_context *message_context, const char *source_name, + int (*chunk_handler)(const char *data, DWORD data_size, DWORD tag, void *ctx), void *ctx) +{ + const struct vkd3d_shader_location location = {.source_name = source_name}; + uint32_t checksum[4], calculated_checksum[4]; + const char *ptr = data; + int ret = VKD3D_OK; + uint32_t chunk_count; + uint32_t total_size; + unsigned int i; + uint32_t version; + uint32_t tag; + + if (data_size < VKD3D_DXBC_HEADER_SIZE) + { + WARN("Invalid data size %zu.\n", data_size); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_SIZE, + "DXBC size %zu is smaller than the DXBC header size.", data_size); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + read_dword(&ptr, &tag); + TRACE("tag: %#x.\n", tag); + + if (tag != TAG_DXBC) + { + WARN("Wrong tag.\n"); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_MAGIC, "Invalid DXBC magic."); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + read_dword(&ptr, &checksum[0]); + read_dword(&ptr, &checksum[1]); + read_dword(&ptr, &checksum[2]); + read_dword(&ptr, &checksum[3]); + vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); + if (memcmp(checksum, calculated_checksum, sizeof(checksum))) + { + WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " + "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", + checksum[0], checksum[1], checksum[2], checksum[3], + calculated_checksum[0], calculated_checksum[1], + calculated_checksum[2], calculated_checksum[3]); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, + "Invalid DXBC checksum."); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + read_dword(&ptr, &version); + TRACE("version: %#x.\n", version); + if (version != 0x00000001) + { + WARN("Got unexpected DXBC version %#x.\n", version); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_VERSION, + "DXBC version %#x is not supported.", version); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + read_dword(&ptr, &total_size); + TRACE("total size: %#x\n", total_size); + + read_dword(&ptr, &chunk_count); + TRACE("chunk count: %#x\n", chunk_count); + + for (i = 0; i < chunk_count; ++i) + { + uint32_t chunk_tag, chunk_size; + const char *chunk_ptr; + uint32_t chunk_offset; + + read_dword(&ptr, &chunk_offset); + TRACE("chunk %u at offset %#x\n", i, chunk_offset); + + if (chunk_offset >= data_size || !require_space(chunk_offset, 2, sizeof(DWORD), data_size)) + { + WARN("Invalid chunk offset %#x (data size %zu).\n", chunk_offset, data_size); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHUNK_OFFSET, + "DXBC chunk %u has invalid offset %#x (data size %#zx).", i, chunk_offset, data_size); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + chunk_ptr = data + chunk_offset; + + read_dword(&chunk_ptr, &chunk_tag); + read_dword(&chunk_ptr, &chunk_size); + + if (!require_space(chunk_ptr - data, 1, chunk_size, data_size)) + { + WARN("Invalid chunk size %#x (data size %zu, chunk offset %#x).\n", + chunk_size, data_size, chunk_offset); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHUNK_SIZE, + "DXBC chunk %u has invalid size %#x (data size %#zx, chunk offset %#x).", + i, chunk_offset, data_size, chunk_offset); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + if ((ret = chunk_handler(chunk_ptr, chunk_size, chunk_tag, ctx)) < 0) + break; + } + + return ret; +} + +static int shader_parse_signature(DWORD tag, const char *data, DWORD data_size, + struct vkd3d_shader_signature *s) +{ + bool has_stream_index, has_min_precision; + struct vkd3d_shader_signature_element *e; + const char *ptr = data; + unsigned int i; + uint32_t count; + + if (!require_space(0, 2, sizeof(DWORD), data_size)) + { + WARN("Invalid data size %#x.\n", data_size); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + read_dword(&ptr, &count); + TRACE("%u elements.\n", count); + + skip_dword_unknown(&ptr, 1); /* It seems to always be 0x00000008. */ + + if (!require_space(ptr - data, count, 6 * sizeof(DWORD), data_size)) + { + WARN("Invalid count %#x (data size %#x).\n", count, data_size); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + if (!(e = vkd3d_calloc(count, sizeof(*e)))) + { + ERR("Failed to allocate input signature memory.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + has_min_precision = tag == TAG_OSG1 || tag == TAG_PSG1 || tag == TAG_ISG1; + has_stream_index = tag == TAG_OSG5 || has_min_precision; + + for (i = 0; i < count; ++i) + { + uint32_t name_offset, mask; + + if (has_stream_index) + read_dword(&ptr, &e[i].stream_index); + else + e[i].stream_index = 0; + + read_dword(&ptr, &name_offset); + if (!(e[i].semantic_name = shader_get_string(data, data_size, name_offset))) + { + WARN("Invalid name offset %#x (data size %#x).\n", name_offset, data_size); + vkd3d_free(e); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + read_dword(&ptr, &e[i].semantic_index); + read_dword(&ptr, &e[i].sysval_semantic); + read_dword(&ptr, &e[i].component_type); + read_dword(&ptr, &e[i].register_index); + read_dword(&ptr, &mask); + e[i].mask = mask & 0xff; + e[i].used_mask = (mask >> 8) & 0xff; + switch (tag) + { + case TAG_OSGN: + case TAG_OSG1: + case TAG_OSG5: + case TAG_PCSG: + case TAG_PSG1: + e[i].used_mask = e[i].mask & ~e[i].used_mask; + break; + } + + if (has_min_precision) + read_dword(&ptr, &e[i].min_precision); + else + e[i].min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; + + TRACE("Stream: %u, semantic: %s, semantic idx: %u, sysval_semantic %#x, " + "type %u, register idx: %u, use_mask %#x, input_mask %#x, precision %u.\n", + e[i].stream_index, debugstr_a(e[i].semantic_name), e[i].semantic_index, e[i].sysval_semantic, + e[i].component_type, e[i].register_index, e[i].used_mask, e[i].mask, e[i].min_precision); + } + + s->elements = e; + s->element_count = count; + + return VKD3D_OK; +} + +static int isgn_handler(const char *data, DWORD data_size, DWORD tag, void *ctx) +{ + struct vkd3d_shader_signature *is = ctx; + + if (tag != TAG_ISGN) + return VKD3D_OK; + + if (is->elements) + { + FIXME("Multiple input signatures.\n"); + vkd3d_shader_free_shader_signature(is); + } + return shader_parse_signature(tag, data, data_size, is); +} + +int shader_parse_input_signature(const void *dxbc, size_t dxbc_length, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature) +{ + int ret; + + memset(signature, 0, sizeof(*signature)); + if ((ret = parse_dxbc(dxbc, dxbc_length, message_context, NULL, isgn_handler, signature)) < 0) + ERR("Failed to parse input signature.\n"); + + return ret; +} + +static int shdr_handler(const char *data, DWORD data_size, DWORD tag, void *context) +{ + struct vkd3d_shader_desc *desc = context; + int ret; + + switch (tag) + { + case TAG_ISGN: + case TAG_ISG1: + if (desc->input_signature.elements) + { + FIXME("Multiple input signatures.\n"); + break; + } + if ((ret = shader_parse_signature(tag, data, data_size, &desc->input_signature)) < 0) + return ret; + break; + + case TAG_OSGN: + case TAG_OSG5: + case TAG_OSG1: + if (desc->output_signature.elements) + { + FIXME("Multiple output signatures.\n"); + break; + } + if ((ret = shader_parse_signature(tag, data, data_size, &desc->output_signature)) < 0) + return ret; + break; + + case TAG_PCSG: + case TAG_PSG1: + if (desc->patch_constant_signature.elements) + { + FIXME("Multiple patch constant signatures.\n"); + break; + } + if ((ret = shader_parse_signature(tag, data, data_size, &desc->patch_constant_signature)) < 0) + return ret; + break; + + case TAG_SHDR: + case TAG_SHEX: + if (desc->byte_code) + FIXME("Multiple shader code chunks.\n"); + desc->byte_code = (const uint32_t *)data; + desc->byte_code_size = data_size; + break; + + case TAG_AON9: + TRACE("Skipping AON9 shader code chunk.\n"); + break; + + case TAG_DXIL: + FIXME("Skipping DXIL shader model 6+ code chunk.\n"); + break; + + default: + TRACE("Skipping chunk %#x.\n", tag); + break; + } + + return VKD3D_OK; +} + +void free_shader_desc(struct vkd3d_shader_desc *desc) +{ + vkd3d_shader_free_shader_signature(&desc->input_signature); + vkd3d_shader_free_shader_signature(&desc->output_signature); + vkd3d_shader_free_shader_signature(&desc->patch_constant_signature); +} + +static int shader_extract_from_dxbc(const void *dxbc, size_t dxbc_length, + struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) +{ + int ret; + + desc->byte_code = NULL; + desc->byte_code_size = 0; + memset(&desc->input_signature, 0, sizeof(desc->input_signature)); + memset(&desc->output_signature, 0, sizeof(desc->output_signature)); + memset(&desc->patch_constant_signature, 0, sizeof(desc->patch_constant_signature)); + + ret = parse_dxbc(dxbc, dxbc_length, message_context, source_name, shdr_handler, desc); + if (!desc->byte_code) + ret = VKD3D_ERROR_INVALID_ARGUMENT; + + if (ret < 0) + { + WARN("Failed to parse shader, vkd3d result %d.\n", ret); + free_shader_desc(desc); + } + + return ret; +} + +int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +{ + struct vkd3d_shader_desc *shader_desc; + struct vkd3d_shader_sm4_parser *sm4; + int ret; + + if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) + { + ERR("Failed to allocate parser.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + shader_desc = &sm4->p.shader_desc; + if ((ret = shader_extract_from_dxbc(compile_info->source.code, compile_info->source.size, + message_context, compile_info->source_name, shader_desc)) < 0) + { + WARN("Failed to extract shader, vkd3d result %d.\n", ret); + vkd3d_free(sm4); + return ret; + } + + if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, + compile_info->source_name, &shader_desc->output_signature, message_context)) + { + WARN("Failed to initialise shader parser.\n"); + free_shader_desc(shader_desc); + vkd3d_free(sm4); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + *parser = &sm4->p; + + return VKD3D_OK; +} + +/* root signatures */ +#define VKD3D_ROOT_SIGNATURE_1_0_ROOT_DESCRIPTOR_FLAGS VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE + +#define VKD3D_ROOT_SIGNATURE_1_0_DESCRIPTOR_RANGE_FLAGS \ + (VKD3D_SHADER_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE | VKD3D_SHADER_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE) + +struct root_signature_parser_context +{ + const char *data; + unsigned int data_size; +}; + +static int shader_parse_descriptor_ranges(struct root_signature_parser_context *context, + unsigned int offset, unsigned int count, struct vkd3d_shader_descriptor_range *ranges) +{ + const char *ptr; + unsigned int i; + + if (!require_space(offset, 5 * count, sizeof(DWORD), context->data_size)) + { + WARN("Invalid data size %#x (offset %u, count %u).\n", context->data_size, offset, count); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + ptr = &context->data[offset]; + + for (i = 0; i < count; ++i) + { + read_dword(&ptr, &ranges[i].range_type); + read_dword(&ptr, &ranges[i].descriptor_count); + read_dword(&ptr, &ranges[i].base_shader_register); + read_dword(&ptr, &ranges[i].register_space); + read_dword(&ptr, &ranges[i].descriptor_table_offset); + + TRACE("Type %#x, descriptor count %u, base shader register %u, " + "register space %u, offset %u.\n", + ranges[i].range_type, ranges[i].descriptor_count, + ranges[i].base_shader_register, ranges[i].register_space, + ranges[i].descriptor_table_offset); + } + + return VKD3D_OK; +} + +static void shader_validate_descriptor_range1(const struct vkd3d_shader_descriptor_range1 *range) +{ + unsigned int unknown_flags = range->flags & ~(VKD3D_SHADER_DESCRIPTOR_RANGE_FLAG_NONE + | VKD3D_SHADER_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE + | VKD3D_SHADER_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE + | VKD3D_SHADER_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE + | VKD3D_SHADER_DESCRIPTOR_RANGE_FLAG_DATA_STATIC); + + if (unknown_flags) + FIXME("Unknown descriptor range flags %#x.\n", unknown_flags); +} + +static int shader_parse_descriptor_ranges1(struct root_signature_parser_context *context, + unsigned int offset, unsigned int count, struct vkd3d_shader_descriptor_range1 *ranges) +{ + const char *ptr; + unsigned int i; + + if (!require_space(offset, 6 * count, sizeof(uint32_t), context->data_size)) + { + WARN("Invalid data size %#x (offset %u, count %u).\n", context->data_size, offset, count); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + ptr = &context->data[offset]; + + for (i = 0; i < count; ++i) + { + read_dword(&ptr, &ranges[i].range_type); + read_dword(&ptr, &ranges[i].descriptor_count); + read_dword(&ptr, &ranges[i].base_shader_register); + read_dword(&ptr, &ranges[i].register_space); + read_dword(&ptr, &ranges[i].flags); + read_dword(&ptr, &ranges[i].descriptor_table_offset); + + TRACE("Type %#x, descriptor count %u, base shader register %u, " + "register space %u, flags %#x, offset %u.\n", + ranges[i].range_type, ranges[i].descriptor_count, + ranges[i].base_shader_register, ranges[i].register_space, + ranges[i].flags, ranges[i].descriptor_table_offset); + + shader_validate_descriptor_range1(&ranges[i]); + } + + return VKD3D_OK; +} + +static int shader_parse_descriptor_table(struct root_signature_parser_context *context, + unsigned int offset, struct vkd3d_shader_root_descriptor_table *table) +{ + struct vkd3d_shader_descriptor_range *ranges; + unsigned int count; + const char *ptr; + + if (!require_space(offset, 2, sizeof(DWORD), context->data_size)) + { + WARN("Invalid data size %#x (offset %u).\n", context->data_size, offset); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + ptr = &context->data[offset]; + + read_dword(&ptr, &count); + read_dword(&ptr, &offset); + + TRACE("Descriptor range count %u.\n", count); + + table->descriptor_range_count = count; + + if (!(ranges = vkd3d_calloc(count, sizeof(*ranges)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + table->descriptor_ranges = ranges; + return shader_parse_descriptor_ranges(context, offset, count, ranges); +} + +static int shader_parse_descriptor_table1(struct root_signature_parser_context *context, + unsigned int offset, struct vkd3d_shader_root_descriptor_table1 *table) +{ + struct vkd3d_shader_descriptor_range1 *ranges; + unsigned int count; + const char *ptr; + + if (!require_space(offset, 2, sizeof(DWORD), context->data_size)) + { + WARN("Invalid data size %#x (offset %u).\n", context->data_size, offset); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + ptr = &context->data[offset]; + + read_dword(&ptr, &count); + read_dword(&ptr, &offset); + + TRACE("Descriptor range count %u.\n", count); + + table->descriptor_range_count = count; + + if (!(ranges = vkd3d_calloc(count, sizeof(*ranges)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + table->descriptor_ranges = ranges; + return shader_parse_descriptor_ranges1(context, offset, count, ranges); +} + +static int shader_parse_root_constants(struct root_signature_parser_context *context, + unsigned int offset, struct vkd3d_shader_root_constants *constants) +{ + const char *ptr; + + if (!require_space(offset, 3, sizeof(DWORD), context->data_size)) + { + WARN("Invalid data size %#x (offset %u).\n", context->data_size, offset); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + ptr = &context->data[offset]; + + read_dword(&ptr, &constants->shader_register); + read_dword(&ptr, &constants->register_space); + read_dword(&ptr, &constants->value_count); + + TRACE("Shader register %u, register space %u, 32-bit value count %u.\n", + constants->shader_register, constants->register_space, constants->value_count); + + return VKD3D_OK; +} + +static int shader_parse_root_descriptor(struct root_signature_parser_context *context, + unsigned int offset, struct vkd3d_shader_root_descriptor *descriptor) +{ + const char *ptr; + + if (!require_space(offset, 2, sizeof(DWORD), context->data_size)) + { + WARN("Invalid data size %#x (offset %u).\n", context->data_size, offset); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + ptr = &context->data[offset]; + + read_dword(&ptr, &descriptor->shader_register); + read_dword(&ptr, &descriptor->register_space); + + TRACE("Shader register %u, register space %u.\n", + descriptor->shader_register, descriptor->register_space); + + return VKD3D_OK; +} + +static void shader_validate_root_descriptor1(const struct vkd3d_shader_root_descriptor1 *descriptor) +{ + unsigned int unknown_flags = descriptor->flags & ~(VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_NONE + | VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE + | VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE + | VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_STATIC); + + if (unknown_flags) + FIXME("Unknown root descriptor flags %#x.\n", unknown_flags); +} + +static int shader_parse_root_descriptor1(struct root_signature_parser_context *context, + unsigned int offset, struct vkd3d_shader_root_descriptor1 *descriptor) +{ + const char *ptr; + + if (!require_space(offset, 3, sizeof(DWORD), context->data_size)) + { + WARN("Invalid data size %#x (offset %u).\n", context->data_size, offset); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + ptr = &context->data[offset]; + + read_dword(&ptr, &descriptor->shader_register); + read_dword(&ptr, &descriptor->register_space); + read_dword(&ptr, &descriptor->flags); + + TRACE("Shader register %u, register space %u, flags %#x.\n", + descriptor->shader_register, descriptor->register_space, descriptor->flags); + + shader_validate_root_descriptor1(descriptor); + + return VKD3D_OK; +} + +static int shader_parse_root_parameters(struct root_signature_parser_context *context, + unsigned int offset, unsigned int count, struct vkd3d_shader_root_parameter *parameters) +{ + const char *ptr; + unsigned int i; + int ret; + + if (!require_space(offset, 3 * count, sizeof(DWORD), context->data_size)) + { + WARN("Invalid data size %#x (offset %u, count %u).\n", context->data_size, offset, count); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + ptr = &context->data[offset]; + + for (i = 0; i < count; ++i) + { + read_dword(&ptr, ¶meters[i].parameter_type); + read_dword(&ptr, ¶meters[i].shader_visibility); + read_dword(&ptr, &offset); + + TRACE("Type %#x, shader visibility %#x.\n", + parameters[i].parameter_type, parameters[i].shader_visibility); + + switch (parameters[i].parameter_type) + { + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + ret = shader_parse_descriptor_table(context, offset, ¶meters[i].u.descriptor_table); + break; + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + ret = shader_parse_root_constants(context, offset, ¶meters[i].u.constants); + break; + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_CBV: + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_SRV: + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_UAV: + ret = shader_parse_root_descriptor(context, offset, ¶meters[i].u.descriptor); + break; + default: + FIXME("Unrecognized type %#x.\n", parameters[i].parameter_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + if (ret < 0) + return ret; + } + + return VKD3D_OK; +} + +static int shader_parse_root_parameters1(struct root_signature_parser_context *context, + uint32_t offset, DWORD count, struct vkd3d_shader_root_parameter1 *parameters) +{ + const char *ptr; + unsigned int i; + int ret; + + if (!require_space(offset, 3 * count, sizeof(DWORD), context->data_size)) + { + WARN("Invalid data size %#x (offset %u, count %u).\n", context->data_size, offset, count); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + ptr = &context->data[offset]; + + for (i = 0; i < count; ++i) + { + read_dword(&ptr, ¶meters[i].parameter_type); + read_dword(&ptr, ¶meters[i].shader_visibility); + read_dword(&ptr, &offset); + + TRACE("Type %#x, shader visibility %#x.\n", + parameters[i].parameter_type, parameters[i].shader_visibility); + + switch (parameters[i].parameter_type) + { + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + ret = shader_parse_descriptor_table1(context, offset, ¶meters[i].u.descriptor_table); + break; + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + ret = shader_parse_root_constants(context, offset, ¶meters[i].u.constants); + break; + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_CBV: + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_SRV: + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_UAV: + ret = shader_parse_root_descriptor1(context, offset, ¶meters[i].u.descriptor); + break; + default: + FIXME("Unrecognized type %#x.\n", parameters[i].parameter_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + if (ret < 0) + return ret; + } + + return VKD3D_OK; +} + +static int shader_parse_static_samplers(struct root_signature_parser_context *context, + unsigned int offset, unsigned int count, struct vkd3d_shader_static_sampler_desc *sampler_descs) +{ + const char *ptr; + unsigned int i; + + if (!require_space(offset, 13 * count, sizeof(DWORD), context->data_size)) + { + WARN("Invalid data size %#x (offset %u, count %u).\n", context->data_size, offset, count); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + ptr = &context->data[offset]; + + for (i = 0; i < count; ++i) + { + read_dword(&ptr, &sampler_descs[i].filter); + read_dword(&ptr, &sampler_descs[i].address_u); + read_dword(&ptr, &sampler_descs[i].address_v); + read_dword(&ptr, &sampler_descs[i].address_w); + read_float(&ptr, &sampler_descs[i].mip_lod_bias); + read_dword(&ptr, &sampler_descs[i].max_anisotropy); + read_dword(&ptr, &sampler_descs[i].comparison_func); + read_dword(&ptr, &sampler_descs[i].border_colour); + read_float(&ptr, &sampler_descs[i].min_lod); + read_float(&ptr, &sampler_descs[i].max_lod); + read_dword(&ptr, &sampler_descs[i].shader_register); + read_dword(&ptr, &sampler_descs[i].register_space); + read_dword(&ptr, &sampler_descs[i].shader_visibility); + } + + return VKD3D_OK; +} + +static int shader_parse_root_signature(const char *data, unsigned int data_size, + struct vkd3d_shader_versioned_root_signature_desc *desc) +{ + struct vkd3d_shader_root_signature_desc *v_1_0 = &desc->u.v_1_0; + struct root_signature_parser_context context; + unsigned int count, offset, version; + const char *ptr = data; + int ret; + + context.data = data; + context.data_size = data_size; + + if (!require_space(0, 6, sizeof(DWORD), data_size)) + { + WARN("Invalid data size %#x.\n", data_size); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + read_dword(&ptr, &version); + TRACE("Version %#x.\n", version); + if (version != VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0 && version != VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1) + { + FIXME("Unknown version %#x.\n", version); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + desc->version = version; + + read_dword(&ptr, &count); + read_dword(&ptr, &offset); + TRACE("Parameter count %u, offset %u.\n", count, offset); + + if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + { + v_1_0->parameter_count = count; + if (v_1_0->parameter_count) + { + struct vkd3d_shader_root_parameter *parameters; + if (!(parameters = vkd3d_calloc(v_1_0->parameter_count, sizeof(*parameters)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + v_1_0->parameters = parameters; + if ((ret = shader_parse_root_parameters(&context, offset, count, parameters)) < 0) + return ret; + } + } + else + { + struct vkd3d_shader_root_signature_desc1 *v_1_1 = &desc->u.v_1_1; + + assert(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); + + v_1_1->parameter_count = count; + if (v_1_1->parameter_count) + { + struct vkd3d_shader_root_parameter1 *parameters; + if (!(parameters = vkd3d_calloc(v_1_1->parameter_count, sizeof(*parameters)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + v_1_1->parameters = parameters; + if ((ret = shader_parse_root_parameters1(&context, offset, count, parameters)) < 0) + return ret; + } + } + + read_dword(&ptr, &count); + read_dword(&ptr, &offset); + TRACE("Static sampler count %u, offset %u.\n", count, offset); + + v_1_0->static_sampler_count = count; + if (v_1_0->static_sampler_count) + { + struct vkd3d_shader_static_sampler_desc *samplers; + if (!(samplers = vkd3d_calloc(v_1_0->static_sampler_count, sizeof(*samplers)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + v_1_0->static_samplers = samplers; + if ((ret = shader_parse_static_samplers(&context, offset, count, samplers)) < 0) + return ret; + } + + read_dword(&ptr, &v_1_0->flags); + TRACE("Flags %#x.\n", v_1_0->flags); + + return VKD3D_OK; +} + +static int rts0_handler(const char *data, DWORD data_size, DWORD tag, void *context) +{ + struct vkd3d_shader_versioned_root_signature_desc *desc = context; + + if (tag != TAG_RTS0) + return VKD3D_OK; + + return shader_parse_root_signature(data, data_size, desc); +} + +int vkd3d_shader_parse_root_signature(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_versioned_root_signature_desc *root_signature, char **messages) +{ + struct vkd3d_shader_message_context message_context; + int ret; + + TRACE("dxbc {%p, %zu}, root_signature %p, messages %p.\n", dxbc->code, dxbc->size, root_signature, messages); + + memset(root_signature, 0, sizeof(*root_signature)); + if (messages) + *messages = NULL; + vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); + + ret = parse_dxbc(dxbc->code, dxbc->size, &message_context, NULL, rts0_handler, root_signature); + vkd3d_shader_message_context_trace_messages(&message_context); + if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) + ret = VKD3D_ERROR_OUT_OF_MEMORY; + + vkd3d_shader_message_context_cleanup(&message_context); + if (ret < 0) + vkd3d_shader_free_root_signature(root_signature); + + return ret; +} + +static unsigned int versioned_root_signature_get_parameter_count( + const struct vkd3d_shader_versioned_root_signature_desc *desc) +{ + if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + return desc->u.v_1_0.parameter_count; + else + return desc->u.v_1_1.parameter_count; +} + +static enum vkd3d_shader_root_parameter_type versioned_root_signature_get_parameter_type( + const struct vkd3d_shader_versioned_root_signature_desc *desc, unsigned int i) +{ + if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + return desc->u.v_1_0.parameters[i].parameter_type; + else + return desc->u.v_1_1.parameters[i].parameter_type; +} + +static enum vkd3d_shader_visibility versioned_root_signature_get_parameter_shader_visibility( + const struct vkd3d_shader_versioned_root_signature_desc *desc, unsigned int i) +{ + if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + return desc->u.v_1_0.parameters[i].shader_visibility; + else + return desc->u.v_1_1.parameters[i].shader_visibility; +} + +static const struct vkd3d_shader_root_constants *versioned_root_signature_get_root_constants( + const struct vkd3d_shader_versioned_root_signature_desc *desc, unsigned int i) +{ + if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + return &desc->u.v_1_0.parameters[i].u.constants; + else + return &desc->u.v_1_1.parameters[i].u.constants; +} + +static unsigned int versioned_root_signature_get_static_sampler_count( + const struct vkd3d_shader_versioned_root_signature_desc *desc) +{ + if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + return desc->u.v_1_0.static_sampler_count; + else + return desc->u.v_1_1.static_sampler_count; +} + +static const struct vkd3d_shader_static_sampler_desc *versioned_root_signature_get_static_samplers( + const struct vkd3d_shader_versioned_root_signature_desc *desc) +{ + if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + return desc->u.v_1_0.static_samplers; + else + return desc->u.v_1_1.static_samplers; +} + +static unsigned int versioned_root_signature_get_flags(const struct vkd3d_shader_versioned_root_signature_desc *desc) +{ + if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + return desc->u.v_1_0.flags; + else + return desc->u.v_1_1.flags; +} + +struct root_signature_writer_context +{ + struct vkd3d_shader_message_context message_context; + + struct vkd3d_bytecode_buffer buffer; + + size_t total_size_position; + size_t chunk_position; +}; + +static size_t get_chunk_offset(struct root_signature_writer_context *context) +{ + return bytecode_get_size(&context->buffer) - context->chunk_position; +} + +static void shader_write_root_signature_header(struct root_signature_writer_context *context) +{ + struct vkd3d_bytecode_buffer *buffer = &context->buffer; + unsigned int i; + + put_u32(buffer, TAG_DXBC); + + /* The checksum is computed when all data is generated. */ + for (i = 0; i < 4; ++i) + put_u32(buffer, 0); + put_u32(buffer, 1); + context->total_size_position = put_u32(buffer, 0xffffffff); + put_u32(buffer, 1); /* chunk count */ + put_u32(buffer, bytecode_get_size(buffer) + sizeof(uint32_t)); /* chunk offset */ + put_u32(buffer, TAG_RTS0); + put_u32(buffer, 0xffffffff); + context->chunk_position = bytecode_get_size(buffer); +} + +static void shader_write_descriptor_ranges(struct vkd3d_bytecode_buffer *buffer, + const struct vkd3d_shader_root_descriptor_table *table) +{ + const struct vkd3d_shader_descriptor_range *ranges = table->descriptor_ranges; + unsigned int i; + + for (i = 0; i < table->descriptor_range_count; ++i) + { + put_u32(buffer, ranges[i].range_type); + put_u32(buffer, ranges[i].descriptor_count); + put_u32(buffer, ranges[i].base_shader_register); + put_u32(buffer, ranges[i].register_space); + put_u32(buffer, ranges[i].descriptor_table_offset); + } +} + +static void shader_write_descriptor_ranges1(struct vkd3d_bytecode_buffer *buffer, + const struct vkd3d_shader_root_descriptor_table1 *table) +{ + const struct vkd3d_shader_descriptor_range1 *ranges = table->descriptor_ranges; + unsigned int i; + + for (i = 0; i < table->descriptor_range_count; ++i) + { + put_u32(buffer, ranges[i].range_type); + put_u32(buffer, ranges[i].descriptor_count); + put_u32(buffer, ranges[i].base_shader_register); + put_u32(buffer, ranges[i].register_space); + put_u32(buffer, ranges[i].flags); + put_u32(buffer, ranges[i].descriptor_table_offset); + } +} + +static void shader_write_descriptor_table(struct root_signature_writer_context *context, + const struct vkd3d_shader_root_descriptor_table *table) +{ + struct vkd3d_bytecode_buffer *buffer = &context->buffer; + + put_u32(buffer, table->descriptor_range_count); + put_u32(buffer, get_chunk_offset(context) + sizeof(uint32_t)); /* offset */ + + shader_write_descriptor_ranges(buffer, table); +} + +static void shader_write_descriptor_table1(struct root_signature_writer_context *context, + const struct vkd3d_shader_root_descriptor_table1 *table) +{ + struct vkd3d_bytecode_buffer *buffer = &context->buffer; + + put_u32(buffer, table->descriptor_range_count); + put_u32(buffer, get_chunk_offset(context) + sizeof(uint32_t)); /* offset */ + + shader_write_descriptor_ranges1(buffer, table); +} + +static void shader_write_root_constants(struct vkd3d_bytecode_buffer *buffer, + const struct vkd3d_shader_root_constants *constants) +{ + put_u32(buffer, constants->shader_register); + put_u32(buffer, constants->register_space); + put_u32(buffer, constants->value_count); +} + +static void shader_write_root_descriptor(struct vkd3d_bytecode_buffer *buffer, + const struct vkd3d_shader_root_descriptor *descriptor) +{ + put_u32(buffer, descriptor->shader_register); + put_u32(buffer, descriptor->register_space); +} + +static void shader_write_root_descriptor1(struct vkd3d_bytecode_buffer *buffer, + const struct vkd3d_shader_root_descriptor1 *descriptor) +{ + put_u32(buffer, descriptor->shader_register); + put_u32(buffer, descriptor->register_space); + put_u32(buffer, descriptor->flags); +} + +static int shader_write_root_parameters(struct root_signature_writer_context *context, + const struct vkd3d_shader_versioned_root_signature_desc *desc) +{ + unsigned int parameter_count = versioned_root_signature_get_parameter_count(desc); + struct vkd3d_bytecode_buffer *buffer = &context->buffer; + size_t parameters_position; + unsigned int i; + + parameters_position = bytecode_get_size(buffer); + for (i = 0; i < parameter_count; ++i) + { + put_u32(buffer, versioned_root_signature_get_parameter_type(desc, i)); + put_u32(buffer, versioned_root_signature_get_parameter_shader_visibility(desc, i)); + put_u32(buffer, 0xffffffff); /* offset */ + } + + for (i = 0; i < parameter_count; ++i) + { + set_u32(buffer, parameters_position + ((3 * i + 2) * sizeof(uint32_t)), get_chunk_offset(context)); + + switch (versioned_root_signature_get_parameter_type(desc, i)) + { + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + shader_write_descriptor_table(context, &desc->u.v_1_0.parameters[i].u.descriptor_table); + else + shader_write_descriptor_table1(context, &desc->u.v_1_1.parameters[i].u.descriptor_table); + break; + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + shader_write_root_constants(buffer, versioned_root_signature_get_root_constants(desc, i)); + break; + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_CBV: + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_SRV: + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_UAV: + if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + shader_write_root_descriptor(buffer, &desc->u.v_1_0.parameters[i].u.descriptor); + else + shader_write_root_descriptor1(buffer, &desc->u.v_1_1.parameters[i].u.descriptor); + break; + default: + FIXME("Unrecognized type %#x.\n", versioned_root_signature_get_parameter_type(desc, i)); + vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_INVALID_ROOT_PARAMETER_TYPE, + "Invalid/unrecognised root signature root parameter type %#x.", + versioned_root_signature_get_parameter_type(desc, i)); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + } + + return VKD3D_OK; +} + +static void shader_write_static_samplers(struct vkd3d_bytecode_buffer *buffer, + const struct vkd3d_shader_versioned_root_signature_desc *desc) +{ + const struct vkd3d_shader_static_sampler_desc *samplers = versioned_root_signature_get_static_samplers(desc); + unsigned int i; + + for (i = 0; i < versioned_root_signature_get_static_sampler_count(desc); ++i) + { + put_u32(buffer, samplers[i].filter); + put_u32(buffer, samplers[i].address_u); + put_u32(buffer, samplers[i].address_v); + put_u32(buffer, samplers[i].address_w); + put_f32(buffer, samplers[i].mip_lod_bias); + put_u32(buffer, samplers[i].max_anisotropy); + put_u32(buffer, samplers[i].comparison_func); + put_u32(buffer, samplers[i].border_colour); + put_f32(buffer, samplers[i].min_lod); + put_f32(buffer, samplers[i].max_lod); + put_u32(buffer, samplers[i].shader_register); + put_u32(buffer, samplers[i].register_space); + put_u32(buffer, samplers[i].shader_visibility); + } +} + +static int shader_write_root_signature(struct root_signature_writer_context *context, + const struct vkd3d_shader_versioned_root_signature_desc *desc) +{ + struct vkd3d_bytecode_buffer *buffer = &context->buffer; + size_t samplers_offset_position; + int ret; + + put_u32(buffer, desc->version); + put_u32(buffer, versioned_root_signature_get_parameter_count(desc)); + put_u32(buffer, get_chunk_offset(context) + 4 * sizeof(uint32_t)); /* offset */ + put_u32(buffer, versioned_root_signature_get_static_sampler_count(desc)); + samplers_offset_position = put_u32(buffer, 0xffffffff); + put_u32(buffer, versioned_root_signature_get_flags(desc)); + + if ((ret = shader_write_root_parameters(context, desc)) < 0) + return ret; + + set_u32(buffer, samplers_offset_position, get_chunk_offset(context)); + shader_write_static_samplers(buffer, desc); + return 0; +} + +static int validate_descriptor_table_v_1_0(const struct vkd3d_shader_root_descriptor_table *descriptor_table, + struct vkd3d_shader_message_context *message_context) +{ + bool have_srv_uav_cbv = false; + bool have_sampler = false; + unsigned int i; + + for (i = 0; i < descriptor_table->descriptor_range_count; ++i) + { + const struct vkd3d_shader_descriptor_range *r = &descriptor_table->descriptor_ranges[i]; + + if (r->range_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV + || r->range_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV + || r->range_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV) + { + have_srv_uav_cbv = true; + } + else if (r->range_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER) + { + have_sampler = true; + } + else + { + WARN("Invalid descriptor range type %#x.\n", r->range_type); + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_RS_INVALID_DESCRIPTOR_RANGE_TYPE, + "Invalid root signature descriptor range type %#x.", r->range_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + } + + if (have_srv_uav_cbv && have_sampler) + { + WARN("Samplers cannot be mixed with CBVs/SRVs/UAVs in descriptor tables.\n"); + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_RS_MIXED_DESCRIPTOR_RANGE_TYPES, + "Encountered both CBV/SRV/UAV and sampler descriptor ranges in the same root descriptor table."); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + return VKD3D_OK; +} + +static int validate_descriptor_table_v_1_1(const struct vkd3d_shader_root_descriptor_table1 *descriptor_table, + struct vkd3d_shader_message_context *message_context) +{ + bool have_srv_uav_cbv = false; + bool have_sampler = false; + unsigned int i; + + for (i = 0; i < descriptor_table->descriptor_range_count; ++i) + { + const struct vkd3d_shader_descriptor_range1 *r = &descriptor_table->descriptor_ranges[i]; + + if (r->range_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV + || r->range_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV + || r->range_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV) + { + have_srv_uav_cbv = true; + } + else if (r->range_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER) + { + have_sampler = true; + } + else + { + WARN("Invalid descriptor range type %#x.\n", r->range_type); + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_RS_INVALID_DESCRIPTOR_RANGE_TYPE, + "Invalid root signature descriptor range type %#x.", r->range_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + } + + if (have_srv_uav_cbv && have_sampler) + { + WARN("Samplers cannot be mixed with CBVs/SRVs/UAVs in descriptor tables.\n"); + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_RS_MIXED_DESCRIPTOR_RANGE_TYPES, + "Encountered both CBV/SRV/UAV and sampler descriptor ranges in the same root descriptor table."); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + return VKD3D_OK; +} + +static int validate_root_signature_desc(const struct vkd3d_shader_versioned_root_signature_desc *desc, + struct vkd3d_shader_message_context *message_context) +{ + int ret = VKD3D_OK; + unsigned int i; + + for (i = 0; i < versioned_root_signature_get_parameter_count(desc); ++i) + { + enum vkd3d_shader_root_parameter_type type; + + type = versioned_root_signature_get_parameter_type(desc, i); + if (type == VKD3D_SHADER_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + { + if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + ret = validate_descriptor_table_v_1_0(&desc->u.v_1_0.parameters[i].u.descriptor_table, message_context); + else + ret = validate_descriptor_table_v_1_1(&desc->u.v_1_1.parameters[i].u.descriptor_table, message_context); + } + + if (ret < 0) + break; + } + + return ret; +} + +int vkd3d_shader_serialize_root_signature(const struct vkd3d_shader_versioned_root_signature_desc *root_signature, + struct vkd3d_shader_code *dxbc, char **messages) +{ + struct root_signature_writer_context context; + size_t total_size, chunk_size; + uint32_t checksum[4]; + unsigned int i; + int ret; + + TRACE("root_signature %p, dxbc %p, messages %p.\n", root_signature, dxbc, messages); + + if (messages) + *messages = NULL; + + memset(&context, 0, sizeof(context)); + vkd3d_shader_message_context_init(&context.message_context, VKD3D_SHADER_LOG_INFO); + + if (root_signature->version != VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0 + && root_signature->version != VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1) + { + ret = VKD3D_ERROR_INVALID_ARGUMENT; + WARN("Root signature version %#x not supported.\n", root_signature->version); + vkd3d_shader_error(&context.message_context, NULL, VKD3D_SHADER_ERROR_RS_INVALID_VERSION, + "Root signature version %#x is not supported.", root_signature->version); + goto done; + } + + if ((ret = validate_root_signature_desc(root_signature, &context.message_context)) < 0) + goto done; + + memset(dxbc, 0, sizeof(*dxbc)); + shader_write_root_signature_header(&context); + + if ((ret = shader_write_root_signature(&context, root_signature)) < 0) + { + vkd3d_free(context.buffer.data); + goto done; + } + + if (context.buffer.status) + { + vkd3d_shader_error(&context.message_context, NULL, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY, + "Out of memory while writing root signature."); + vkd3d_free(context.buffer.data); + goto done; + } + + total_size = bytecode_get_size(&context.buffer); + chunk_size = get_chunk_offset(&context); + set_u32(&context.buffer, context.total_size_position, total_size); + set_u32(&context.buffer, context.chunk_position - sizeof(uint32_t), chunk_size); + + dxbc->code = context.buffer.data; + dxbc->size = total_size; + + vkd3d_compute_dxbc_checksum(dxbc->code, dxbc->size, checksum); + for (i = 0; i < 4; ++i) + set_u32(&context.buffer, (i + 1) * sizeof(uint32_t), checksum[i]); + + ret = VKD3D_OK; + +done: + vkd3d_shader_message_context_trace_messages(&context.message_context); + if (!vkd3d_shader_message_context_copy_messages(&context.message_context, messages)) + ret = VKD3D_ERROR_OUT_OF_MEMORY; + vkd3d_shader_message_context_cleanup(&context.message_context); + return ret; +} + +static void free_descriptor_ranges(const struct vkd3d_shader_root_parameter *parameters, unsigned int count) +{ + unsigned int i; + + if (!parameters) + return; + + for (i = 0; i < count; ++i) + { + const struct vkd3d_shader_root_parameter *p = ¶meters[i]; + + if (p->parameter_type == VKD3D_SHADER_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + vkd3d_free((void *)p->u.descriptor_table.descriptor_ranges); + } +} + +static int convert_root_parameters_to_v_1_0(struct vkd3d_shader_root_parameter *dst, + const struct vkd3d_shader_root_parameter1 *src, unsigned int count) +{ + const struct vkd3d_shader_descriptor_range1 *ranges1; + struct vkd3d_shader_descriptor_range *ranges; + unsigned int i, j; + int ret; + + for (i = 0; i < count; ++i) + { + const struct vkd3d_shader_root_parameter1 *p1 = &src[i]; + struct vkd3d_shader_root_parameter *p = &dst[i]; + + p->parameter_type = p1->parameter_type; + switch (p->parameter_type) + { + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + ranges = NULL; + if ((p->u.descriptor_table.descriptor_range_count = p1->u.descriptor_table.descriptor_range_count)) + { + if (!(ranges = vkd3d_calloc(p->u.descriptor_table.descriptor_range_count, sizeof(*ranges)))) + { + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; + } + } + p->u.descriptor_table.descriptor_ranges = ranges; + ranges1 = p1->u.descriptor_table.descriptor_ranges; + for (j = 0; j < p->u.descriptor_table.descriptor_range_count; ++j) + { + ranges[j].range_type = ranges1[j].range_type; + ranges[j].descriptor_count = ranges1[j].descriptor_count; + ranges[j].base_shader_register = ranges1[j].base_shader_register; + ranges[j].register_space = ranges1[j].register_space; + ranges[j].descriptor_table_offset = ranges1[j].descriptor_table_offset; + } + break; + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + p->u.constants = p1->u.constants; + break; + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_CBV: + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_SRV: + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_UAV: + p->u.descriptor.shader_register = p1->u.descriptor.shader_register; + p->u.descriptor.register_space = p1->u.descriptor.register_space; + break; + default: + WARN("Invalid root parameter type %#x.\n", p->parameter_type); + ret = VKD3D_ERROR_INVALID_ARGUMENT; + goto fail; + + } + p->shader_visibility = p1->shader_visibility; + } + + return VKD3D_OK; + +fail: + free_descriptor_ranges(dst, i); + return ret; +} + +static int convert_root_signature_to_v1_0(struct vkd3d_shader_versioned_root_signature_desc *dst, + const struct vkd3d_shader_versioned_root_signature_desc *src) +{ + const struct vkd3d_shader_root_signature_desc1 *src_desc = &src->u.v_1_1; + struct vkd3d_shader_root_signature_desc *dst_desc = &dst->u.v_1_0; + struct vkd3d_shader_static_sampler_desc *samplers = NULL; + struct vkd3d_shader_root_parameter *parameters = NULL; + int ret; + + if ((dst_desc->parameter_count = src_desc->parameter_count)) + { + if (!(parameters = vkd3d_calloc(dst_desc->parameter_count, sizeof(*parameters)))) + { + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; + } + if ((ret = convert_root_parameters_to_v_1_0(parameters, src_desc->parameters, src_desc->parameter_count)) < 0) + goto fail; + } + dst_desc->parameters = parameters; + if ((dst_desc->static_sampler_count = src_desc->static_sampler_count)) + { + if (!(samplers = vkd3d_calloc(dst_desc->static_sampler_count, sizeof(*samplers)))) + { + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; + } + memcpy(samplers, src_desc->static_samplers, src_desc->static_sampler_count * sizeof(*samplers)); + } + dst_desc->static_samplers = samplers; + dst_desc->flags = src_desc->flags; + + return VKD3D_OK; + +fail: + free_descriptor_ranges(parameters, dst_desc->parameter_count); + vkd3d_free(parameters); + vkd3d_free(samplers); + return ret; +} + +static void free_descriptor_ranges1(const struct vkd3d_shader_root_parameter1 *parameters, unsigned int count) +{ + unsigned int i; + + if (!parameters) + return; + + for (i = 0; i < count; ++i) + { + const struct vkd3d_shader_root_parameter1 *p = ¶meters[i]; + + if (p->parameter_type == VKD3D_SHADER_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + vkd3d_free((void *)p->u.descriptor_table.descriptor_ranges); + } +} + +static int convert_root_parameters_to_v_1_1(struct vkd3d_shader_root_parameter1 *dst, + const struct vkd3d_shader_root_parameter *src, unsigned int count) +{ + const struct vkd3d_shader_descriptor_range *ranges; + struct vkd3d_shader_descriptor_range1 *ranges1; + unsigned int i, j; + int ret; + + for (i = 0; i < count; ++i) + { + const struct vkd3d_shader_root_parameter *p = &src[i]; + struct vkd3d_shader_root_parameter1 *p1 = &dst[i]; + + p1->parameter_type = p->parameter_type; + switch (p1->parameter_type) + { + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + ranges1 = NULL; + if ((p1->u.descriptor_table.descriptor_range_count = p->u.descriptor_table.descriptor_range_count)) + { + if (!(ranges1 = vkd3d_calloc(p1->u.descriptor_table.descriptor_range_count, sizeof(*ranges1)))) + { + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; + } + } + p1->u.descriptor_table.descriptor_ranges = ranges1; + ranges = p->u.descriptor_table.descriptor_ranges; + for (j = 0; j < p1->u.descriptor_table.descriptor_range_count; ++j) + { + ranges1[j].range_type = ranges[j].range_type; + ranges1[j].descriptor_count = ranges[j].descriptor_count; + ranges1[j].base_shader_register = ranges[j].base_shader_register; + ranges1[j].register_space = ranges[j].register_space; + ranges1[j].flags = VKD3D_ROOT_SIGNATURE_1_0_DESCRIPTOR_RANGE_FLAGS; + ranges1[j].descriptor_table_offset = ranges[j].descriptor_table_offset; + } + break; + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + p1->u.constants = p->u.constants; + break; + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_CBV: + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_SRV: + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_UAV: + p1->u.descriptor.shader_register = p->u.descriptor.shader_register; + p1->u.descriptor.register_space = p->u.descriptor.register_space; + p1->u.descriptor.flags = VKD3D_ROOT_SIGNATURE_1_0_ROOT_DESCRIPTOR_FLAGS; + break; + default: + WARN("Invalid root parameter type %#x.\n", p1->parameter_type); + ret = VKD3D_ERROR_INVALID_ARGUMENT; + goto fail; + + } + p1->shader_visibility = p->shader_visibility; + } + + return VKD3D_OK; + +fail: + free_descriptor_ranges1(dst, i); + return ret; +} + +static int convert_root_signature_to_v1_1(struct vkd3d_shader_versioned_root_signature_desc *dst, + const struct vkd3d_shader_versioned_root_signature_desc *src) +{ + const struct vkd3d_shader_root_signature_desc *src_desc = &src->u.v_1_0; + struct vkd3d_shader_root_signature_desc1 *dst_desc = &dst->u.v_1_1; + struct vkd3d_shader_static_sampler_desc *samplers = NULL; + struct vkd3d_shader_root_parameter1 *parameters = NULL; + int ret; + + if ((dst_desc->parameter_count = src_desc->parameter_count)) + { + if (!(parameters = vkd3d_calloc(dst_desc->parameter_count, sizeof(*parameters)))) + { + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; + } + if ((ret = convert_root_parameters_to_v_1_1(parameters, src_desc->parameters, src_desc->parameter_count)) < 0) + goto fail; + } + dst_desc->parameters = parameters; + if ((dst_desc->static_sampler_count = src_desc->static_sampler_count)) + { + if (!(samplers = vkd3d_calloc(dst_desc->static_sampler_count, sizeof(*samplers)))) + { + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; + } + memcpy(samplers, src_desc->static_samplers, src_desc->static_sampler_count * sizeof(*samplers)); + } + dst_desc->static_samplers = samplers; + dst_desc->flags = src_desc->flags; + + return VKD3D_OK; + +fail: + free_descriptor_ranges1(parameters, dst_desc->parameter_count); + vkd3d_free(parameters); + vkd3d_free(samplers); + return ret; +} + +int vkd3d_shader_convert_root_signature(struct vkd3d_shader_versioned_root_signature_desc *dst, + enum vkd3d_shader_root_signature_version version, const struct vkd3d_shader_versioned_root_signature_desc *src) +{ + int ret; + + TRACE("dst %p, version %#x, src %p.\n", dst, version, src); + + if (src->version == version) + { + WARN("Nothing to convert.\n"); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + if (version != VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0 && version != VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1) + { + WARN("Root signature version %#x not supported.\n", version); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + if (src->version != VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0 + && src->version != VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1) + { + WARN("Root signature version %#x not supported.\n", src->version); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + memset(dst, 0, sizeof(*dst)); + dst->version = version; + + if (version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + { + ret = convert_root_signature_to_v1_0(dst, src); + } + else + { + assert(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); + ret = convert_root_signature_to_v1_1(dst, src); + } + + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c new file mode 100644 index 00000000000..56fa7043a3f --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -0,0 +1,137 @@ +/* + * Copyright 2021 Atharva Nimbalkar + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" + +struct vkd3d_glsl_generator +{ + struct vkd3d_shader_version version; + struct vkd3d_string_buffer buffer; + const struct vkd3d_shader_location *location; + struct vkd3d_shader_message_context *message_context; + bool failed; +}; + +struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, + struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) +{ + struct vkd3d_glsl_generator *generator; + + if (!(generator = vkd3d_malloc(sizeof(*generator)))) + return NULL; + + memset(generator, 0, sizeof(*generator)); + generator->version = *version; + vkd3d_string_buffer_init(&generator->buffer); + generator->location = location; + generator->message_context = message_context; + return generator; +} + +static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( + struct vkd3d_glsl_generator *generator, + enum vkd3d_shader_error error, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vkd3d_shader_verror(generator->message_context, generator->location, error, fmt, args); + va_end(args); + generator->failed = true; +} + +static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, + const struct vkd3d_shader_instruction *ins) +{ + const struct vkd3d_shader_version *version = &generator->version; + + /* + * TODO: Implement in_subroutine + * TODO: shader_glsl_generate_shader_epilogue(generator); + */ + if (version->major >= 4) + { + vkd3d_string_buffer_printf(&generator->buffer, "return;\n"); + } +} + +static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->handler_idx) + { + case VKD3DSIH_DCL_INPUT: + case VKD3DSIH_DCL_OUTPUT: + case VKD3DSIH_DCL_OUTPUT_SIV: + break; + case VKD3DSIH_RET: + shader_glsl_ret(generator, instruction); + break; + default: + vkd3d_glsl_compiler_error(generator, + VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Unhandled instruction %#x", instruction->handler_idx); + break; + } +} + +int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, + struct vkd3d_shader_parser *parser, struct vkd3d_shader_code *out) +{ + void *code; + struct vkd3d_shader_instruction ins; + + vkd3d_string_buffer_printf(&generator->buffer, "#version 440\n\n"); + vkd3d_string_buffer_printf(&generator->buffer, "void main()\n{\n"); + + while (!vkd3d_shader_parser_is_end(parser)) + { + vkd3d_shader_parser_read_instruction(parser, &ins); + + if (ins.handler_idx == VKD3DSIH_INVALID) + { + vkd3d_glsl_compiler_error(generator, + VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Encountered unrecognized or invalid instruction."); + break; + } + + vkd3d_glsl_handle_instruction(generator, &ins); + } + + if (parser->failed || generator->failed) + return VKD3D_ERROR_INVALID_SHADER; + + vkd3d_string_buffer_printf(&generator->buffer, "}\n"); + + if ((code = vkd3d_malloc(generator->buffer.buffer_size))) + { + memcpy(code, generator->buffer.buffer, generator->buffer.content_size); + out->size = generator->buffer.content_size; + out->code = code; + } + else return VKD3D_ERROR_OUT_OF_MEMORY; + + return VKD3D_OK; +} + +void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator) +{ + vkd3d_string_buffer_cleanup(&generator->buffer); + vkd3d_free(generator); +} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c new file mode 100644 index 00000000000..ea5e35d20de --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -0,0 +1,2029 @@ +/* + * HLSL utility functions + * + * Copyright 2012 Matteo Bruni for CodeWeavers + * Copyright 2019-2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "hlsl.h" +#include + +void hlsl_note(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_log_level level, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vkd3d_shader_vnote(ctx->message_context, loc, level, fmt, args); + va_end(args); +} + +void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vkd3d_shader_verror(ctx->message_context, loc, error, fmt, args); + va_end(args); + + if (!ctx->result) + ctx->result = VKD3D_ERROR_INVALID_SHADER; +} + +void hlsl_warning(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vkd3d_shader_vwarning(ctx->message_context, loc, error, fmt, args); + va_end(args); +} + +void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, const char *fmt, ...) +{ + struct vkd3d_string_buffer *string; + va_list args; + + va_start(args, fmt); + string = hlsl_get_string_buffer(ctx); + vkd3d_string_buffer_printf(string, "Aborting due to not yet implemented feature: "); + vkd3d_string_buffer_vprintf(string, fmt, args); + vkd3d_shader_error(ctx->message_context, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "%s", string->buffer); + hlsl_release_string_buffer(ctx, string); + va_end(args); + + if (!ctx->result) + ctx->result = VKD3D_ERROR_NOT_IMPLEMENTED; +} + +bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var) +{ + struct hlsl_scope *scope = ctx->cur_scope; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (!strcmp(decl->name, var->name)) + return false; + } + if (local_var && scope->upper->upper == ctx->globals) + { + /* Check whether the variable redefines a function parameter. */ + LIST_FOR_EACH_ENTRY(var, &scope->upper->vars, struct hlsl_ir_var, scope_entry) + { + if (!strcmp(decl->name, var->name)) + return false; + } + } + + list_add_tail(&scope->vars, &decl->scope_entry); + return true; +} + +struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) +{ + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (!strcmp(name, var->name)) + return var; + } + if (!scope->upper) + return NULL; + return hlsl_get_var(scope->upper, name); +} + +void hlsl_free_var(struct hlsl_ir_var *decl) +{ + vkd3d_free((void *)decl->name); + vkd3d_free((void *)decl->semantic.name); + vkd3d_free(decl); +} + +static bool hlsl_type_is_row_major(const struct hlsl_type *type) +{ + /* Default to column-major if the majority isn't explicitly set, which can + * happen for anonymous nodes. */ + return !!(type->modifiers & HLSL_MODIFIER_ROW_MAJOR); +} + +static unsigned int get_array_size(const struct hlsl_type *type) +{ + if (type->type == HLSL_CLASS_ARRAY) + return get_array_size(type->e.array.type) * type->e.array.elements_count; + return 1; +} + +unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset) +{ + /* Align to the next vec4 boundary if: + * (a) the type is a struct or array type, or + * (b) the type would cross a vec4 boundary; i.e. a vec3 and a + * vec1 can be packed together, but not a vec3 and a vec2. + */ + if (type->type > HLSL_CLASS_LAST_NUMERIC || (offset & 3) + type->reg_size > 4) + return align(offset, 4); + return offset; +} + +static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type *type) +{ + bool is_sm4 = (ctx->profile->major_version >= 4); + + switch (type->type) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + type->reg_size = is_sm4 ? type->dimx : 4; + break; + + case HLSL_CLASS_MATRIX: + if (hlsl_type_is_row_major(type)) + type->reg_size = is_sm4 ? (4 * (type->dimy - 1) + type->dimx) : (4 * type->dimy); + else + type->reg_size = is_sm4 ? (4 * (type->dimx - 1) + type->dimy) : (4 * type->dimx); + break; + + case HLSL_CLASS_ARRAY: + { + unsigned int element_size = type->e.array.type->reg_size; + + assert(element_size); + if (is_sm4) + type->reg_size = (type->e.array.elements_count - 1) * align(element_size, 4) + element_size; + else + type->reg_size = type->e.array.elements_count * element_size; + break; + } + + case HLSL_CLASS_STRUCT: + { + struct hlsl_struct_field *field; + + type->dimx = 0; + type->reg_size = 0; + + LIST_FOR_EACH_ENTRY(field, type->e.elements, struct hlsl_struct_field, entry) + { + unsigned int field_size = field->type->reg_size; + + assert(field_size); + + type->reg_size = hlsl_type_get_sm4_offset(field->type, type->reg_size); + field->reg_offset = type->reg_size; + type->reg_size += field_size; + + type->dimx += field->type->dimx * field->type->dimy * get_array_size(field->type); + } + break; + } + + case HLSL_CLASS_OBJECT: + /* For convenience when performing copy propagation. */ + type->reg_size = 1; + break; + } +} + +static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, enum hlsl_type_class type_class, + enum hlsl_base_type base_type, unsigned dimx, unsigned dimy) +{ + struct hlsl_type *type; + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + if (!(type->name = hlsl_strdup(ctx, name))) + { + vkd3d_free(type); + return NULL; + } + type->type = type_class; + type->base_type = base_type; + type->dimx = dimx; + type->dimy = dimy; + hlsl_type_calculate_reg_size(ctx, type); + + list_add_tail(&ctx->types, &type->entry); + + return type; +} + +struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size) +{ + struct hlsl_type *type; + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + + type->type = HLSL_CLASS_ARRAY; + type->modifiers = basic_type->modifiers; + type->e.array.elements_count = array_size; + type->e.array.type = basic_type; + type->dimx = basic_type->dimx; + type->dimy = basic_type->dimy; + hlsl_type_calculate_reg_size(ctx, type); + + list_add_tail(&ctx->types, &type->entry); + + return type; +} + +struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct list *fields) +{ + struct hlsl_type *type; + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + type->type = HLSL_CLASS_STRUCT; + type->base_type = HLSL_TYPE_VOID; + type->name = name; + type->dimy = 1; + type->e.elements = fields; + hlsl_type_calculate_reg_size(ctx, type); + + list_add_tail(&ctx->types, &type->entry); + + return type; +} + +struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format) +{ + struct hlsl_type *type; + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + type->type = HLSL_CLASS_OBJECT; + type->base_type = HLSL_TYPE_TEXTURE; + type->dimx = 4; + type->dimy = 1; + type->sampler_dim = dim; + type->e.resource_format = format; + hlsl_type_calculate_reg_size(ctx, type); + list_add_tail(&ctx->types, &type->entry); + return type; +} + +struct hlsl_type *hlsl_get_type(struct hlsl_scope *scope, const char *name, bool recursive) +{ + struct rb_entry *entry = rb_get(&scope->types, name); + + if (entry) + return RB_ENTRY_VALUE(entry, struct hlsl_type, scope_entry); + + if (recursive && scope->upper) + return hlsl_get_type(scope->upper, name, recursive); + return NULL; +} + +bool hlsl_get_function(struct hlsl_ctx *ctx, const char *name) +{ + return rb_get(&ctx->functions, name) != NULL; +} + +struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const char *name) +{ + struct hlsl_ir_function_decl *decl; + struct hlsl_ir_function *func; + struct rb_entry *entry; + + if ((entry = rb_get(&ctx->functions, name))) + { + func = RB_ENTRY_VALUE(entry, struct hlsl_ir_function, entry); + RB_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) + return decl; + } + + return NULL; +} + +unsigned int hlsl_type_component_count(struct hlsl_type *type) +{ + struct hlsl_struct_field *field; + unsigned int count = 0; + + if (type->type <= HLSL_CLASS_LAST_NUMERIC) + { + return type->dimx * type->dimy; + } + if (type->type == HLSL_CLASS_ARRAY) + { + return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; + } + if (type->type != HLSL_CLASS_STRUCT) + { + ERR("Unexpected data type %#x.\n", type->type); + return 0; + } + + LIST_FOR_EACH_ENTRY(field, type->e.elements, struct hlsl_struct_field, entry) + { + count += hlsl_type_component_count(field->type); + } + return count; +} + +bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2) +{ + if (t1 == t2) + return true; + + if (t1->type != t2->type) + return false; + if (t1->base_type != t2->base_type) + return false; + if (t1->base_type == HLSL_TYPE_SAMPLER || t1->base_type == HLSL_TYPE_TEXTURE) + { + if (t1->sampler_dim != t2->sampler_dim) + return false; + if (t1->base_type == HLSL_TYPE_TEXTURE && t1->sampler_dim != HLSL_SAMPLER_DIM_GENERIC + && !hlsl_types_are_equal(t1->e.resource_format, t2->e.resource_format)) + return false; + } + if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) + != (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR)) + return false; + if (t1->dimx != t2->dimx) + return false; + if (t1->dimy != t2->dimy) + return false; + if (t1->type == HLSL_CLASS_STRUCT) + { + struct list *t1cur, *t2cur; + struct hlsl_struct_field *t1field, *t2field; + + t1cur = list_head(t1->e.elements); + t2cur = list_head(t2->e.elements); + while (t1cur && t2cur) + { + t1field = LIST_ENTRY(t1cur, struct hlsl_struct_field, entry); + t2field = LIST_ENTRY(t2cur, struct hlsl_struct_field, entry); + if (!hlsl_types_are_equal(t1field->type, t2field->type)) + return false; + if (strcmp(t1field->name, t2field->name)) + return false; + t1cur = list_next(t1->e.elements, t1cur); + t2cur = list_next(t2->e.elements, t2cur); + } + if (t1cur != t2cur) + return false; + } + if (t1->type == HLSL_CLASS_ARRAY) + return t1->e.array.elements_count == t2->e.array.elements_count + && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); + + return true; +} + +struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, + unsigned int default_majority, unsigned int modifiers) +{ + struct hlsl_struct_field *old_field, *field; + struct hlsl_type *type; + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + + if (old->name) + { + type->name = hlsl_strdup(ctx, old->name); + if (!type->name) + { + vkd3d_free(type); + return NULL; + } + } + type->type = old->type; + type->base_type = old->base_type; + type->dimx = old->dimx; + type->dimy = old->dimy; + type->modifiers = old->modifiers | modifiers; + if (!(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) + type->modifiers |= default_majority; + type->sampler_dim = old->sampler_dim; + switch (old->type) + { + case HLSL_CLASS_ARRAY: + type->e.array.type = hlsl_type_clone(ctx, old->e.array.type, default_majority, modifiers); + type->e.array.elements_count = old->e.array.elements_count; + break; + + case HLSL_CLASS_STRUCT: + { + if (!(type->e.elements = hlsl_alloc(ctx, sizeof(*type->e.elements)))) + { + vkd3d_free((void *)type->name); + vkd3d_free(type); + return NULL; + } + list_init(type->e.elements); + LIST_FOR_EACH_ENTRY(old_field, old->e.elements, struct hlsl_struct_field, entry) + { + if (!(field = hlsl_alloc(ctx, sizeof(*field)))) + { + LIST_FOR_EACH_ENTRY_SAFE(field, old_field, type->e.elements, struct hlsl_struct_field, entry) + { + vkd3d_free((void *)field->semantic.name); + vkd3d_free((void *)field->name); + vkd3d_free(field); + } + vkd3d_free(type->e.elements); + vkd3d_free((void *)type->name); + vkd3d_free(type); + return NULL; + } + field->loc = old_field->loc; + field->type = hlsl_type_clone(ctx, old_field->type, default_majority, modifiers); + field->name = hlsl_strdup(ctx, old_field->name); + if (old_field->semantic.name) + { + field->semantic.name = hlsl_strdup(ctx, old_field->semantic.name); + field->semantic.index = old_field->semantic.index; + } + list_add_tail(type->e.elements, &field->entry); + } + break; + } + + default: + break; + } + + hlsl_type_calculate_reg_size(ctx, type); + + list_add_tail(&ctx->types, &type->entry); + return type; +} + +bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type) +{ + if (hlsl_get_type(scope, type->name, false)) + return false; + + rb_put(&scope->types, type->name, &type->scope_entry); + return true; +} + +struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *cast; + + cast = hlsl_new_unary_expr(ctx, HLSL_OP1_CAST, node, *loc); + if (cast) + cast->data_type = type; + return hlsl_ir_expr(cast); +} + +struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) +{ + /* Use a cast to the same type as a makeshift identity expression. */ + return hlsl_new_cast(ctx, node, node->data_type, &node->loc); +} + +struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, + const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers, + const struct hlsl_reg_reservation *reg_reservation) +{ + struct hlsl_ir_var *var; + + if (!(var = hlsl_alloc(ctx, sizeof(*var)))) + return NULL; + + var->name = name; + var->data_type = type; + var->loc = loc; + if (semantic) + var->semantic = *semantic; + var->modifiers = modifiers; + if (reg_reservation) + var->reg_reservation = *reg_reservation; + return var; +} + +struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, + const struct vkd3d_shader_location loc) +{ + struct hlsl_ir_var *var = hlsl_new_var(ctx, hlsl_strdup(ctx, name), type, loc, NULL, 0, NULL); + + if (var) + list_add_tail(&ctx->globals->vars, &var->scope_entry); + return var; +} + +static bool type_is_single_reg(const struct hlsl_type *type) +{ + return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR; +} + +struct hlsl_ir_store *hlsl_new_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct hlsl_ir_node *offset, + struct hlsl_ir_node *rhs, unsigned int writemask, struct vkd3d_shader_location loc) +{ + struct hlsl_ir_store *store; + + if (!writemask && type_is_single_reg(rhs->data_type)) + writemask = (1 << rhs->data_type->dimx) - 1; + + if (!(store = hlsl_alloc(ctx, sizeof(*store)))) + return NULL; + + init_node(&store->node, HLSL_IR_STORE, NULL, loc); + store->lhs.var = var; + hlsl_src_from_node(&store->lhs.offset, offset); + hlsl_src_from_node(&store->rhs, rhs); + store->writemask = writemask; + return store; +} + +struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs) +{ + return hlsl_new_store(ctx, lhs, NULL, rhs, 0, rhs->loc); +} + +struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n, + const struct vkd3d_shader_location loc) +{ + struct hlsl_ir_constant *c; + + if (!(c = hlsl_alloc(ctx, sizeof(*c)))) + return NULL; + init_node(&c->node, HLSL_IR_CONSTANT, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc); + c->value[0].i = n; + return c; +} + +struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, + const struct vkd3d_shader_location loc) +{ + struct hlsl_ir_constant *c; + + if (!(c = hlsl_alloc(ctx, sizeof(*c)))) + return NULL; + init_node(&c->node, HLSL_IR_CONSTANT, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); + c->value[0].u = n; + return c; +} + +struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg, struct vkd3d_shader_location loc) +{ + struct hlsl_ir_expr *expr; + + if (!(expr = hlsl_alloc(ctx, sizeof(*expr)))) + return NULL; + init_node(&expr->node, HLSL_IR_EXPR, arg->data_type, loc); + expr->op = op; + hlsl_src_from_node(&expr->operands[0], arg); + return &expr->node; +} + +struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2) +{ + struct hlsl_ir_expr *expr; + + assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); + + if (!(expr = hlsl_alloc(ctx, sizeof(*expr)))) + return NULL; + init_node(&expr->node, HLSL_IR_EXPR, arg1->data_type, arg1->loc); + expr->op = op; + hlsl_src_from_node(&expr->operands[0], arg1); + hlsl_src_from_node(&expr->operands[1], arg2); + return &expr->node; +} + +struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc) +{ + struct hlsl_ir_if *iff; + + if (!(iff = hlsl_alloc(ctx, sizeof(*iff)))) + return NULL; + init_node(&iff->node, HLSL_IR_IF, NULL, loc); + hlsl_src_from_node(&iff->condition, condition); + list_init(&iff->then_instrs.instrs); + list_init(&iff->else_instrs.instrs); + return iff; +} + +struct hlsl_ir_load *hlsl_new_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct hlsl_ir_node *offset, + struct hlsl_type *type, const struct vkd3d_shader_location loc) +{ + struct hlsl_ir_load *load; + + if (!(load = hlsl_alloc(ctx, sizeof(*load)))) + return NULL; + init_node(&load->node, HLSL_IR_LOAD, type, loc); + load->src.var = var; + hlsl_src_from_node(&load->src.offset, offset); + return load; +} + +struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, + const struct vkd3d_shader_location loc) +{ + return hlsl_new_load(ctx, var, NULL, var->data_type, loc); +} + +struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, struct hlsl_type *data_type, + enum hlsl_resource_load_type type, struct hlsl_ir_var *resource, struct hlsl_ir_node *resource_offset, + struct hlsl_ir_var *sampler, struct hlsl_ir_node *sampler_offset, struct hlsl_ir_node *coords, + struct hlsl_ir_node *texel_offset, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_resource_load *load; + + if (!(load = hlsl_alloc(ctx, sizeof(*load)))) + return NULL; + init_node(&load->node, HLSL_IR_RESOURCE_LOAD, data_type, *loc); + load->load_type = type; + load->resource.var = resource; + hlsl_src_from_node(&load->resource.offset, resource_offset); + load->sampler.var = sampler; + hlsl_src_from_node(&load->sampler.offset, sampler_offset); + hlsl_src_from_node(&load->coords, coords); + hlsl_src_from_node(&load->texel_offset, texel_offset); + return load; +} + +struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, + struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_swizzle *swizzle; + + if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) + return NULL; + init_node(&swizzle->node, HLSL_IR_SWIZZLE, + hlsl_get_vector_type(ctx, val->data_type->base_type, components), *loc); + hlsl_src_from_node(&swizzle->val, val); + swizzle->swizzle = s; + return swizzle; +} + +struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc) +{ + struct hlsl_ir_jump *jump; + + if (!(jump = hlsl_alloc(ctx, sizeof(*jump)))) + return NULL; + init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); + jump->type = type; + return jump; +} + +struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc) +{ + struct hlsl_ir_loop *loop; + + if (!(loop = hlsl_alloc(ctx, sizeof(*loop)))) + return NULL; + init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); + list_init(&loop->body.instrs); + return loop; +} + +struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, + struct list *parameters, const struct hlsl_semantic *semantic, struct vkd3d_shader_location loc) +{ + struct hlsl_ir_function_decl *decl; + + if (!(decl = hlsl_alloc(ctx, sizeof(*decl)))) + return NULL; + list_init(&decl->body.instrs); + decl->return_type = return_type; + decl->parameters = parameters; + decl->loc = loc; + + if (!hlsl_types_are_equal(return_type, ctx->builtin_types.Void)) + { + struct hlsl_ir_var *return_var; + char name[28]; + + sprintf(name, "", decl); + if (!(return_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name), return_type, loc, semantic, 0, NULL))) + { + vkd3d_free(decl); + return NULL; + } + list_add_tail(&ctx->globals->vars, &return_var->scope_entry); + decl->return_var = return_var; + } + + return decl; +} + +struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, + const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc) +{ + struct hlsl_buffer *buffer; + + if (!(buffer = hlsl_alloc(ctx, sizeof(*buffer)))) + return NULL; + buffer->type = type; + buffer->name = name; + if (reservation) + buffer->reservation = *reservation; + buffer->loc = loc; + list_add_tail(&ctx->buffers, &buffer->entry); + return buffer; +} + +static int compare_hlsl_types_rb(const void *key, const struct rb_entry *entry) +{ + const struct hlsl_type *type = RB_ENTRY_VALUE(entry, const struct hlsl_type, scope_entry); + const char *name = key; + + if (name == type->name) + return 0; + + if (!name || !type->name) + { + ERR("hlsl_type without a name in a scope?\n"); + return -1; + } + return strcmp(name, type->name); +} + +void hlsl_push_scope(struct hlsl_ctx *ctx) +{ + struct hlsl_scope *new_scope; + + if (!(new_scope = hlsl_alloc(ctx, sizeof(*new_scope)))) + return; + TRACE("Pushing a new scope.\n"); + list_init(&new_scope->vars); + rb_init(&new_scope->types, compare_hlsl_types_rb); + new_scope->upper = ctx->cur_scope; + ctx->cur_scope = new_scope; + list_add_tail(&ctx->scopes, &new_scope->entry); +} + +void hlsl_pop_scope(struct hlsl_ctx *ctx) +{ + struct hlsl_scope *prev_scope = ctx->cur_scope->upper; + + assert(prev_scope); + TRACE("Popping current scope.\n"); + ctx->cur_scope = prev_scope; +} + +static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hlsl_type *t2) +{ + int r; + + if ((r = vkd3d_u32_compare(t1->type, t2->type))) + { + if (!((t1->type == HLSL_CLASS_SCALAR && t2->type == HLSL_CLASS_VECTOR) + || (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_SCALAR))) + return r; + } + if ((r = vkd3d_u32_compare(t1->base_type, t2->base_type))) + return r; + if (t1->base_type == HLSL_TYPE_SAMPLER || t1->base_type == HLSL_TYPE_TEXTURE) + { + if ((r = vkd3d_u32_compare(t1->sampler_dim, t2->sampler_dim))) + return r; + if (t1->base_type == HLSL_TYPE_TEXTURE && t1->sampler_dim != HLSL_SAMPLER_DIM_GENERIC + && (r = compare_param_hlsl_types(t1->e.resource_format, t2->e.resource_format))) + return r; + } + if ((r = vkd3d_u32_compare(t1->dimx, t2->dimx))) + return r; + if ((r = vkd3d_u32_compare(t1->dimy, t2->dimy))) + return r; + if (t1->type == HLSL_CLASS_STRUCT) + { + struct list *t1cur, *t2cur; + struct hlsl_struct_field *t1field, *t2field; + + t1cur = list_head(t1->e.elements); + t2cur = list_head(t2->e.elements); + while (t1cur && t2cur) + { + t1field = LIST_ENTRY(t1cur, struct hlsl_struct_field, entry); + t2field = LIST_ENTRY(t2cur, struct hlsl_struct_field, entry); + if ((r = compare_param_hlsl_types(t1field->type, t2field->type))) + return r; + if ((r = strcmp(t1field->name, t2field->name))) + return r; + t1cur = list_next(t1->e.elements, t1cur); + t2cur = list_next(t2->e.elements, t2cur); + } + if (t1cur != t2cur) + return t1cur ? 1 : -1; + return 0; + } + if (t1->type == HLSL_CLASS_ARRAY) + { + if ((r = vkd3d_u32_compare(t1->e.array.elements_count, t2->e.array.elements_count))) + return r; + return compare_param_hlsl_types(t1->e.array.type, t2->e.array.type); + } + + return 0; +} + +static int compare_function_decl_rb(const void *key, const struct rb_entry *entry) +{ + const struct list *params = key; + const struct hlsl_ir_function_decl *decl = RB_ENTRY_VALUE(entry, const struct hlsl_ir_function_decl, entry); + int decl_params_count = decl->parameters ? list_count(decl->parameters) : 0; + int params_count = params ? list_count(params) : 0; + struct list *p1cur, *p2cur; + int r; + + if ((r = vkd3d_u32_compare(params_count, decl_params_count))) + return r; + + p1cur = params ? list_head(params) : NULL; + p2cur = decl->parameters ? list_head(decl->parameters) : NULL; + while (p1cur && p2cur) + { + struct hlsl_ir_var *p1, *p2; + p1 = LIST_ENTRY(p1cur, struct hlsl_ir_var, param_entry); + p2 = LIST_ENTRY(p2cur, struct hlsl_ir_var, param_entry); + if ((r = compare_param_hlsl_types(p1->data_type, p2->data_type))) + return r; + p1cur = list_next(params, p1cur); + p2cur = list_next(decl->parameters, p2cur); + } + return 0; +} + +struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) +{ + struct vkd3d_string_buffer *string; + + static const char *const base_types[] = + { + [HLSL_TYPE_FLOAT] = "float", + [HLSL_TYPE_HALF] = "half", + [HLSL_TYPE_DOUBLE] = "double", + [HLSL_TYPE_INT] = "int", + [HLSL_TYPE_UINT] = "uint", + [HLSL_TYPE_BOOL] = "bool", + }; + + if (!(string = hlsl_get_string_buffer(ctx))) + return NULL; + + if (type->name) + { + vkd3d_string_buffer_printf(string, "%s", type->name); + return string; + } + + switch (type->type) + { + case HLSL_CLASS_SCALAR: + assert(type->base_type < ARRAY_SIZE(base_types)); + vkd3d_string_buffer_printf(string, "%s", base_types[type->base_type]); + return string; + + case HLSL_CLASS_VECTOR: + assert(type->base_type < ARRAY_SIZE(base_types)); + vkd3d_string_buffer_printf(string, "%s%u", base_types[type->base_type], type->dimx); + return string; + + case HLSL_CLASS_MATRIX: + assert(type->base_type < ARRAY_SIZE(base_types)); + vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->base_type], type->dimy, type->dimx); + return string; + + case HLSL_CLASS_ARRAY: + { + struct vkd3d_string_buffer *inner_string; + const struct hlsl_type *t; + + for (t = type; t->type == HLSL_CLASS_ARRAY; t = t->e.array.type) + ; + + if ((inner_string = hlsl_type_to_string(ctx, t))) + { + vkd3d_string_buffer_printf(string, "%s", inner_string->buffer); + hlsl_release_string_buffer(ctx, inner_string); + } + + for (t = type; t->type == HLSL_CLASS_ARRAY; t = t->e.array.type) + vkd3d_string_buffer_printf(string, "[%u]", t->e.array.elements_count); + return string; + } + + case HLSL_CLASS_STRUCT: + vkd3d_string_buffer_printf(string, ""); + return string; + + case HLSL_CLASS_OBJECT: + { + static const char *const dimensions[] = + { + [HLSL_SAMPLER_DIM_1D] = "1D", + [HLSL_SAMPLER_DIM_2D] = "2D", + [HLSL_SAMPLER_DIM_3D] = "3D", + [HLSL_SAMPLER_DIM_CUBE] = "Cube", + [HLSL_SAMPLER_DIM_1DARRAY] = "1DArray", + [HLSL_SAMPLER_DIM_2DARRAY] = "2DArray", + [HLSL_SAMPLER_DIM_2DMS] = "2DMS", + [HLSL_SAMPLER_DIM_2DMSARRAY] = "2DMSArray", + [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", + }; + + switch (type->base_type) + { + case HLSL_TYPE_TEXTURE: + if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + { + vkd3d_string_buffer_printf(string, "Texture"); + return string; + } + + assert(type->sampler_dim < ARRAY_SIZE(dimensions)); + assert(type->e.resource_format->base_type < ARRAY_SIZE(base_types)); + vkd3d_string_buffer_printf(string, "Texture%s<%s%u>", dimensions[type->sampler_dim], + base_types[type->e.resource_format->base_type], type->e.resource_format->dimx); + return string; + + default: + vkd3d_string_buffer_printf(string, ""); + return string; + } + } + + default: + vkd3d_string_buffer_printf(string, ""); + return string; + } +} + +const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) +{ + struct vkd3d_string_buffer *string; + const char *ret; + + if (!(string = hlsl_type_to_string(ctx, type))) + return NULL; + ret = vkd3d_dbg_sprintf("%s", string->buffer); + hlsl_release_string_buffer(ctx, string); + return ret; +} + +struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers) +{ + struct vkd3d_string_buffer *string; + + if (!(string = hlsl_get_string_buffer(ctx))) + return NULL; + + if (modifiers & HLSL_STORAGE_EXTERN) + vkd3d_string_buffer_printf(string, "extern "); + if (modifiers & HLSL_STORAGE_NOINTERPOLATION) + vkd3d_string_buffer_printf(string, "nointerpolation "); + if (modifiers & HLSL_MODIFIER_PRECISE) + vkd3d_string_buffer_printf(string, "precise "); + if (modifiers & HLSL_STORAGE_SHARED) + vkd3d_string_buffer_printf(string, "shared "); + if (modifiers & HLSL_STORAGE_GROUPSHARED) + vkd3d_string_buffer_printf(string, "groupshared "); + if (modifiers & HLSL_STORAGE_STATIC) + vkd3d_string_buffer_printf(string, "static "); + if (modifiers & HLSL_STORAGE_UNIFORM) + vkd3d_string_buffer_printf(string, "uniform "); + if (modifiers & HLSL_STORAGE_VOLATILE) + vkd3d_string_buffer_printf(string, "volatile "); + if (modifiers & HLSL_MODIFIER_CONST) + vkd3d_string_buffer_printf(string, "const "); + if (modifiers & HLSL_MODIFIER_ROW_MAJOR) + vkd3d_string_buffer_printf(string, "row_major "); + if (modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + vkd3d_string_buffer_printf(string, "column_major "); + if ((modifiers & (HLSL_STORAGE_IN | HLSL_STORAGE_OUT)) == (HLSL_STORAGE_IN | HLSL_STORAGE_OUT)) + vkd3d_string_buffer_printf(string, "inout "); + else if (modifiers & HLSL_STORAGE_IN) + vkd3d_string_buffer_printf(string, "in "); + else if (modifiers & HLSL_STORAGE_OUT) + vkd3d_string_buffer_printf(string, "out "); + + if (string->content_size) + string->buffer[--string->content_size] = 0; + + return string; +} + +const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) +{ + static const char * const names[] = + { + "HLSL_IR_CONSTANT", + "HLSL_IR_EXPR", + "HLSL_IR_IF", + "HLSL_IR_LOAD", + "HLSL_IR_LOOP", + "HLSL_IR_JUMP", + "HLSL_IR_RESOURCE_LOAD", + "HLSL_IR_STORE", + "HLSL_IR_SWIZZLE", + }; + + if (type >= ARRAY_SIZE(names)) + return "Unexpected node type"; + return names[type]; +} + +static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr); + +static void dump_instr_list(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct list *list) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, list, struct hlsl_ir_node, entry) + { + dump_instr(ctx, buffer, instr); + vkd3d_string_buffer_printf(buffer, "\n"); + } +} + +static void dump_src(struct vkd3d_string_buffer *buffer, const struct hlsl_src *src) +{ + if (src->node->index) + vkd3d_string_buffer_printf(buffer, "@%u", src->node->index); + else + vkd3d_string_buffer_printf(buffer, "%p", src->node); +} + +static void dump_ir_var(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_var *var) +{ + if (var->modifiers) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_modifiers_to_string(ctx, var->modifiers))) + vkd3d_string_buffer_printf(buffer, "%s ", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + vkd3d_string_buffer_printf(buffer, "%s %s", debug_hlsl_type(ctx, var->data_type), var->name); + if (var->semantic.name) + vkd3d_string_buffer_printf(buffer, " : %s%u", var->semantic.name, var->semantic.index); +} + +static void dump_deref(struct vkd3d_string_buffer *buffer, const struct hlsl_deref *deref) +{ + if (deref->var) + { + vkd3d_string_buffer_printf(buffer, "%s", deref->var->name); + if (deref->offset.node) + { + vkd3d_string_buffer_printf(buffer, "["); + dump_src(buffer, &deref->offset); + vkd3d_string_buffer_printf(buffer, "]"); + } + } + else + { + vkd3d_string_buffer_printf(buffer, "(nil)"); + } +} + +const char *debug_hlsl_writemask(unsigned int writemask) +{ + static const char components[] = {'x', 'y', 'z', 'w'}; + char string[5]; + unsigned int i = 0, pos = 0; + + assert(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); + + while (writemask) + { + if (writemask & 1) + string[pos++] = components[i]; + writemask >>= 1; + i++; + } + string[pos] = '\0'; + return vkd3d_dbg_sprintf(".%s", string); +} + +const char *debug_hlsl_swizzle(unsigned int swizzle, unsigned int size) +{ + static const char components[] = {'x', 'y', 'z', 'w'}; + char string[5]; + unsigned int i; + + assert(size <= ARRAY_SIZE(components)); + for (i = 0; i < size; ++i) + string[i] = components[(swizzle >> i * 2) & 3]; + string[size] = 0; + return vkd3d_dbg_sprintf(".%s", string); +} + +static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_constant *constant) +{ + struct hlsl_type *type = constant->node.data_type; + unsigned int x; + + if (type->dimx != 1) + vkd3d_string_buffer_printf(buffer, "{"); + for (x = 0; x < type->dimx; ++x) + { + const union hlsl_constant_value *value = &constant->value[x]; + + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + vkd3d_string_buffer_printf(buffer, "%s ", value->b ? "true" : "false"); + break; + + case HLSL_TYPE_DOUBLE: + vkd3d_string_buffer_printf(buffer, "%.16e ", value->d); + break; + + case HLSL_TYPE_FLOAT: + vkd3d_string_buffer_printf(buffer, "%.8e ", value->f); + break; + + case HLSL_TYPE_INT: + vkd3d_string_buffer_printf(buffer, "%d ", value->i); + break; + + case HLSL_TYPE_UINT: + vkd3d_string_buffer_printf(buffer, "%u ", value->u); + break; + + default: + assert(0); + } + } + if (type->dimx != 1) + vkd3d_string_buffer_printf(buffer, "}"); +} + +const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) +{ + static const char *const op_names[] = + { + [HLSL_OP1_ABS] = "abs", + [HLSL_OP1_BIT_NOT] = "~", + [HLSL_OP1_CAST] = "cast", + [HLSL_OP1_COS] = "cos", + [HLSL_OP1_COS_REDUCED] = "cos_reduced", + [HLSL_OP1_DSX] = "dsx", + [HLSL_OP1_DSY] = "dsy", + [HLSL_OP1_EXP2] = "exp2", + [HLSL_OP1_FRACT] = "fract", + [HLSL_OP1_LOG2] = "log2", + [HLSL_OP1_LOGIC_NOT] = "!", + [HLSL_OP1_NEG] = "-", + [HLSL_OP1_NRM] = "nrm", + [HLSL_OP1_RCP] = "rcp", + [HLSL_OP1_ROUND] = "round", + [HLSL_OP1_RSQ] = "rsq", + [HLSL_OP1_SAT] = "sat", + [HLSL_OP1_SIGN] = "sign", + [HLSL_OP1_SIN] = "sin", + [HLSL_OP1_SIN_REDUCED] = "sin_reduced", + [HLSL_OP1_SQRT] = "sqrt", + + [HLSL_OP2_ADD] = "+", + [HLSL_OP2_BIT_AND] = "&", + [HLSL_OP2_BIT_OR] = "|", + [HLSL_OP2_BIT_XOR] = "^", + [HLSL_OP2_CRS] = "crs", + [HLSL_OP2_DIV] = "/", + [HLSL_OP2_DOT] = "dot", + [HLSL_OP2_EQUAL] = "==", + [HLSL_OP2_GEQUAL] = ">=", + [HLSL_OP2_LESS] = "<", + [HLSL_OP2_LOGIC_AND] = "&&", + [HLSL_OP2_LOGIC_OR] = "||", + [HLSL_OP2_LSHIFT] = "<<", + [HLSL_OP2_MAX] = "max", + [HLSL_OP2_MIN] = "min", + [HLSL_OP2_MOD] = "%", + [HLSL_OP2_MUL] = "*", + [HLSL_OP2_NEQUAL] = "!=", + [HLSL_OP2_RSHIFT] = ">>", + + [HLSL_OP3_LERP] = "lerp", + }; + + return op_names[op]; +} + +static void dump_ir_expr(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_expr *expr) +{ + unsigned int i; + + vkd3d_string_buffer_printf(buffer, "%s (", debug_hlsl_expr_op(expr->op)); + for (i = 0; i < HLSL_MAX_OPERANDS && expr->operands[i].node; ++i) + { + dump_src(buffer, &expr->operands[i]); + vkd3d_string_buffer_printf(buffer, " "); + } + vkd3d_string_buffer_printf(buffer, ")"); +} + +static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_if *if_node) +{ + vkd3d_string_buffer_printf(buffer, "if ("); + dump_src(buffer, &if_node->condition); + vkd3d_string_buffer_printf(buffer, ") {\n"); + dump_instr_list(ctx, buffer, &if_node->then_instrs.instrs); + vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); + dump_instr_list(ctx, buffer, &if_node->else_instrs.instrs); + vkd3d_string_buffer_printf(buffer, " %10s }", ""); +} + +static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_jump *jump) +{ + switch (jump->type) + { + case HLSL_IR_JUMP_BREAK: + vkd3d_string_buffer_printf(buffer, "break"); + break; + + case HLSL_IR_JUMP_CONTINUE: + vkd3d_string_buffer_printf(buffer, "continue"); + break; + + case HLSL_IR_JUMP_DISCARD: + vkd3d_string_buffer_printf(buffer, "discard"); + break; + + case HLSL_IR_JUMP_RETURN: + vkd3d_string_buffer_printf(buffer, "return"); + break; + } +} + +static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop) +{ + vkd3d_string_buffer_printf(buffer, "for (;;) {\n"); + dump_instr_list(ctx, buffer, &loop->body.instrs); + vkd3d_string_buffer_printf(buffer, " %10s }", ""); +} + +static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_resource_load *load) +{ + static const char *const type_names[] = + { + [HLSL_RESOURCE_LOAD] = "load_resource", + [HLSL_RESOURCE_SAMPLE] = "sample", + [HLSL_RESOURCE_GATHER_RED] = "gather_red", + [HLSL_RESOURCE_GATHER_GREEN] = "gather_green", + [HLSL_RESOURCE_GATHER_BLUE] = "gather_blue", + [HLSL_RESOURCE_GATHER_ALPHA] = "gather_alpha", + }; + + assert(load->load_type < ARRAY_SIZE(type_names)); + vkd3d_string_buffer_printf(buffer, "%s(resource = ", type_names[load->load_type]); + dump_deref(buffer, &load->resource); + vkd3d_string_buffer_printf(buffer, ", sampler = "); + dump_deref(buffer, &load->sampler); + vkd3d_string_buffer_printf(buffer, ", coords = "); + dump_src(buffer, &load->coords); + if (load->texel_offset.node) + { + vkd3d_string_buffer_printf(buffer, ", offset = "); + dump_src(buffer, &load->texel_offset); + } + vkd3d_string_buffer_printf(buffer, ")"); +} + +static void dump_ir_store(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_store *store) +{ + vkd3d_string_buffer_printf(buffer, "= ("); + dump_deref(buffer, &store->lhs); + if (store->writemask != VKD3DSP_WRITEMASK_ALL) + vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_writemask(store->writemask)); + vkd3d_string_buffer_printf(buffer, " "); + dump_src(buffer, &store->rhs); + vkd3d_string_buffer_printf(buffer, ")"); +} + +static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) +{ + unsigned int i; + + dump_src(buffer, &swizzle->val); + if (swizzle->val.node->data_type->dimy > 1) + { + vkd3d_string_buffer_printf(buffer, "."); + for (i = 0; i < swizzle->node.data_type->dimx; ++i) + vkd3d_string_buffer_printf(buffer, "_m%u%u", (swizzle->swizzle >> i * 8) & 0xf, (swizzle->swizzle >> (i * 8 + 4)) & 0xf); + } + else + { + vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->swizzle, swizzle->node.data_type->dimx)); + } +} + +static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr) +{ + if (instr->index) + vkd3d_string_buffer_printf(buffer, "%4u: ", instr->index); + else + vkd3d_string_buffer_printf(buffer, "%p: ", instr); + + vkd3d_string_buffer_printf(buffer, "%10s | ", instr->data_type ? debug_hlsl_type(ctx, instr->data_type) : ""); + + switch (instr->type) + { + case HLSL_IR_CONSTANT: + dump_ir_constant(buffer, hlsl_ir_constant(instr)); + break; + + case HLSL_IR_EXPR: + dump_ir_expr(buffer, hlsl_ir_expr(instr)); + break; + + case HLSL_IR_IF: + dump_ir_if(ctx, buffer, hlsl_ir_if(instr)); + break; + + case HLSL_IR_JUMP: + dump_ir_jump(buffer, hlsl_ir_jump(instr)); + break; + + case HLSL_IR_LOAD: + dump_deref(buffer, &hlsl_ir_load(instr)->src); + break; + + case HLSL_IR_LOOP: + dump_ir_loop(ctx, buffer, hlsl_ir_loop(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: + dump_ir_resource_load(buffer, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_STORE: + dump_ir_store(buffer, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWIZZLE: + dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); + break; + } +} + +void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func) +{ + struct vkd3d_string_buffer buffer; + struct hlsl_ir_var *param; + + vkd3d_string_buffer_init(&buffer); + vkd3d_string_buffer_printf(&buffer, "Dumping function %s.\n", func->func->name); + vkd3d_string_buffer_printf(&buffer, "Function parameters:\n"); + LIST_FOR_EACH_ENTRY(param, func->parameters, struct hlsl_ir_var, param_entry) + { + dump_ir_var(ctx, &buffer, param); + vkd3d_string_buffer_printf(&buffer, "\n"); + } + if (func->has_body) + dump_instr_list(ctx, &buffer, &func->body.instrs); + + vkd3d_string_buffer_trace(&buffer); + vkd3d_string_buffer_cleanup(&buffer); +} + +void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) +{ + struct hlsl_src *src, *next; + + LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) + { + hlsl_src_remove(src); + hlsl_src_from_node(src, new); + } + list_remove(&old->entry); + hlsl_free_instr(old); +} + + +void hlsl_free_type(struct hlsl_type *type) +{ + struct hlsl_struct_field *field, *next_field; + + vkd3d_free((void *)type->name); + if (type->type == HLSL_CLASS_STRUCT) + { + LIST_FOR_EACH_ENTRY_SAFE(field, next_field, type->e.elements, struct hlsl_struct_field, entry) + { + vkd3d_free((void *)field->name); + vkd3d_free((void *)field->semantic.name); + vkd3d_free(field); + } + } + vkd3d_free(type); +} + +void hlsl_free_instr_list(struct list *list) +{ + struct hlsl_ir_node *node, *next_node; + + if (!list) + return; + /* Iterate in reverse, to avoid use-after-free when unlinking sources from + * the "uses" list. */ + LIST_FOR_EACH_ENTRY_SAFE_REV(node, next_node, list, struct hlsl_ir_node, entry) + hlsl_free_instr(node); +} + +static void free_ir_constant(struct hlsl_ir_constant *constant) +{ + vkd3d_free(constant); +} + +static void free_ir_expr(struct hlsl_ir_expr *expr) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) + hlsl_src_remove(&expr->operands[i]); + vkd3d_free(expr); +} + +static void free_ir_if(struct hlsl_ir_if *if_node) +{ + hlsl_free_instr_list(&if_node->then_instrs.instrs); + hlsl_free_instr_list(&if_node->else_instrs.instrs); + hlsl_src_remove(&if_node->condition); + vkd3d_free(if_node); +} + +static void free_ir_jump(struct hlsl_ir_jump *jump) +{ + vkd3d_free(jump); +} + +static void free_ir_load(struct hlsl_ir_load *load) +{ + hlsl_src_remove(&load->src.offset); + vkd3d_free(load); +} + +static void free_ir_loop(struct hlsl_ir_loop *loop) +{ + hlsl_free_instr_list(&loop->body.instrs); + vkd3d_free(loop); +} + +static void free_ir_resource_load(struct hlsl_ir_resource_load *load) +{ + hlsl_src_remove(&load->coords); + hlsl_src_remove(&load->sampler.offset); + hlsl_src_remove(&load->resource.offset); + hlsl_src_remove(&load->texel_offset); + vkd3d_free(load); +} + +static void free_ir_store(struct hlsl_ir_store *store) +{ + hlsl_src_remove(&store->rhs); + hlsl_src_remove(&store->lhs.offset); + vkd3d_free(store); +} + +static void free_ir_swizzle(struct hlsl_ir_swizzle *swizzle) +{ + hlsl_src_remove(&swizzle->val); + vkd3d_free(swizzle); +} + +void hlsl_free_instr(struct hlsl_ir_node *node) +{ + assert(list_empty(&node->uses)); + + switch (node->type) + { + case HLSL_IR_CONSTANT: + free_ir_constant(hlsl_ir_constant(node)); + break; + + case HLSL_IR_EXPR: + free_ir_expr(hlsl_ir_expr(node)); + break; + + case HLSL_IR_IF: + free_ir_if(hlsl_ir_if(node)); + break; + + case HLSL_IR_JUMP: + free_ir_jump(hlsl_ir_jump(node)); + break; + + case HLSL_IR_LOAD: + free_ir_load(hlsl_ir_load(node)); + break; + + case HLSL_IR_LOOP: + free_ir_loop(hlsl_ir_loop(node)); + break; + + case HLSL_IR_RESOURCE_LOAD: + free_ir_resource_load(hlsl_ir_resource_load(node)); + break; + + case HLSL_IR_STORE: + free_ir_store(hlsl_ir_store(node)); + break; + + case HLSL_IR_SWIZZLE: + free_ir_swizzle(hlsl_ir_swizzle(node)); + break; + } +} + +static void free_function_decl(struct hlsl_ir_function_decl *decl) +{ + vkd3d_free(decl->parameters); + hlsl_free_instr_list(&decl->body.instrs); + vkd3d_free(decl); +} + +static void free_function_decl_rb(struct rb_entry *entry, void *context) +{ + free_function_decl(RB_ENTRY_VALUE(entry, struct hlsl_ir_function_decl, entry)); +} + +static void free_function(struct hlsl_ir_function *func) +{ + rb_destroy(&func->overloads, free_function_decl_rb, NULL); + vkd3d_free((void *)func->name); + vkd3d_free(func); +} + +static void free_function_rb(struct rb_entry *entry, void *context) +{ + free_function(RB_ENTRY_VALUE(entry, struct hlsl_ir_function, entry)); +} + +void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl, bool intrinsic) +{ + struct hlsl_ir_function *func; + struct rb_entry *func_entry, *old_entry; + + func_entry = rb_get(&ctx->functions, name); + if (func_entry) + { + func = RB_ENTRY_VALUE(func_entry, struct hlsl_ir_function, entry); + if (intrinsic != func->intrinsic) + { + if (intrinsic) + { + ERR("Redeclaring a user defined function as an intrinsic.\n"); + return; + } + func->intrinsic = intrinsic; + rb_destroy(&func->overloads, free_function_decl_rb, NULL); + rb_init(&func->overloads, compare_function_decl_rb); + } + decl->func = func; + if ((old_entry = rb_get(&func->overloads, decl->parameters))) + { + struct hlsl_ir_function_decl *old_decl = + RB_ENTRY_VALUE(old_entry, struct hlsl_ir_function_decl, entry); + + if (!decl->has_body) + { + free_function_decl(decl); + vkd3d_free(name); + return; + } + rb_remove(&func->overloads, old_entry); + free_function_decl(old_decl); + } + rb_put(&func->overloads, decl->parameters, &decl->entry); + vkd3d_free(name); + return; + } + func = hlsl_alloc(ctx, sizeof(*func)); + func->name = name; + rb_init(&func->overloads, compare_function_decl_rb); + decl->func = func; + rb_put(&func->overloads, decl->parameters, &decl->entry); + func->intrinsic = intrinsic; + rb_put(&ctx->functions, func->name, &func->entry); +} + +unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask) +{ + unsigned int i, ret = 0; + + /* Leave replicate swizzles alone; some instructions need them. */ + if (swizzle == HLSL_SWIZZLE(X, X, X, X) + || swizzle == HLSL_SWIZZLE(Y, Y, Y, Y) + || swizzle == HLSL_SWIZZLE(Z, Z, Z, Z) + || swizzle == HLSL_SWIZZLE(W, W, W, W)) + return swizzle; + + for (i = 0; i < 4; ++i) + { + if (writemask & (1 << i)) + { + ret |= (swizzle & 3) << (i * 2); + swizzle >>= 2; + } + } + return ret; +} + +unsigned int hlsl_swizzle_from_writemask(unsigned int writemask) +{ + static const unsigned int swizzles[16] = + { + 0, + HLSL_SWIZZLE(X, X, X, X), + HLSL_SWIZZLE(Y, Y, Y, Y), + HLSL_SWIZZLE(X, Y, X, X), + HLSL_SWIZZLE(Z, Z, Z, Z), + HLSL_SWIZZLE(X, Z, X, X), + HLSL_SWIZZLE(Y, Z, X, X), + HLSL_SWIZZLE(X, Y, Z, X), + HLSL_SWIZZLE(W, W, W, W), + HLSL_SWIZZLE(X, W, X, X), + HLSL_SWIZZLE(Y, W, X, X), + HLSL_SWIZZLE(X, Y, W, X), + HLSL_SWIZZLE(Z, W, X, X), + HLSL_SWIZZLE(X, Z, W, X), + HLSL_SWIZZLE(Y, Z, W, X), + HLSL_SWIZZLE(X, Y, Z, W), + }; + + return swizzles[writemask & 0xf]; +} + +unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second) +{ + unsigned int ret = 0, i, j = 0; + + for (i = 0; i < 4; ++i) + { + if (first & (1 << i)) + { + if (second & (1 << j++)) + ret |= (1 << i); + } + } + + return ret; +} + +unsigned int hlsl_combine_swizzles(unsigned int first, unsigned int second, unsigned int dim) +{ + unsigned int ret = 0, i; + for (i = 0; i < dim; ++i) + { + unsigned int s = (second >> (i * 2)) & 3; + ret |= ((first >> (s * 2)) & 3) << (i * 2); + } + return ret; +} + +static const struct hlsl_profile_info *get_target_info(const char *target) +{ + unsigned int i; + + static const struct hlsl_profile_info profiles[] = + { + {"cs_4_0", VKD3D_SHADER_TYPE_COMPUTE, 4, 0, 0, 0, false}, + {"cs_4_1", VKD3D_SHADER_TYPE_COMPUTE, 4, 1, 0, 0, false}, + {"cs_5_0", VKD3D_SHADER_TYPE_COMPUTE, 5, 0, 0, 0, false}, + {"ds_5_0", VKD3D_SHADER_TYPE_DOMAIN, 5, 0, 0, 0, false}, + {"fx_2_0", VKD3D_SHADER_TYPE_EFFECT, 2, 0, 0, 0, false}, + {"fx_4_0", VKD3D_SHADER_TYPE_EFFECT, 4, 0, 0, 0, false}, + {"fx_4_1", VKD3D_SHADER_TYPE_EFFECT, 4, 1, 0, 0, false}, + {"fx_5_0", VKD3D_SHADER_TYPE_EFFECT, 5, 0, 0, 0, false}, + {"gs_4_0", VKD3D_SHADER_TYPE_GEOMETRY, 4, 0, 0, 0, false}, + {"gs_4_1", VKD3D_SHADER_TYPE_GEOMETRY, 4, 1, 0, 0, false}, + {"gs_5_0", VKD3D_SHADER_TYPE_GEOMETRY, 5, 0, 0, 0, false}, + {"hs_5_0", VKD3D_SHADER_TYPE_HULL, 5, 0, 0, 0, false}, + {"ps.1.0", VKD3D_SHADER_TYPE_PIXEL, 1, 0, 0, 0, false}, + {"ps.1.1", VKD3D_SHADER_TYPE_PIXEL, 1, 1, 0, 0, false}, + {"ps.1.2", VKD3D_SHADER_TYPE_PIXEL, 1, 2, 0, 0, false}, + {"ps.1.3", VKD3D_SHADER_TYPE_PIXEL, 1, 3, 0, 0, false}, + {"ps.1.4", VKD3D_SHADER_TYPE_PIXEL, 1, 4, 0, 0, false}, + {"ps.2.0", VKD3D_SHADER_TYPE_PIXEL, 2, 0, 0, 0, false}, + {"ps.2.a", VKD3D_SHADER_TYPE_PIXEL, 2, 1, 0, 0, false}, + {"ps.2.b", VKD3D_SHADER_TYPE_PIXEL, 2, 2, 0, 0, false}, + {"ps.2.sw", VKD3D_SHADER_TYPE_PIXEL, 2, 0, 0, 0, true}, + {"ps.3.0", VKD3D_SHADER_TYPE_PIXEL, 3, 0, 0, 0, false}, + {"ps_1_0", VKD3D_SHADER_TYPE_PIXEL, 1, 0, 0, 0, false}, + {"ps_1_1", VKD3D_SHADER_TYPE_PIXEL, 1, 1, 0, 0, false}, + {"ps_1_2", VKD3D_SHADER_TYPE_PIXEL, 1, 2, 0, 0, false}, + {"ps_1_3", VKD3D_SHADER_TYPE_PIXEL, 1, 3, 0, 0, false}, + {"ps_1_4", VKD3D_SHADER_TYPE_PIXEL, 1, 4, 0, 0, false}, + {"ps_2_0", VKD3D_SHADER_TYPE_PIXEL, 2, 0, 0, 0, false}, + {"ps_2_a", VKD3D_SHADER_TYPE_PIXEL, 2, 1, 0, 0, false}, + {"ps_2_b", VKD3D_SHADER_TYPE_PIXEL, 2, 2, 0, 0, false}, + {"ps_2_sw", VKD3D_SHADER_TYPE_PIXEL, 2, 0, 0, 0, true}, + {"ps_3_0", VKD3D_SHADER_TYPE_PIXEL, 3, 0, 0, 0, false}, + {"ps_3_sw", VKD3D_SHADER_TYPE_PIXEL, 3, 0, 0, 0, true}, + {"ps_4_0", VKD3D_SHADER_TYPE_PIXEL, 4, 0, 0, 0, false}, + {"ps_4_0_level_9_0", VKD3D_SHADER_TYPE_PIXEL, 4, 0, 9, 0, false}, + {"ps_4_0_level_9_1", VKD3D_SHADER_TYPE_PIXEL, 4, 0, 9, 1, false}, + {"ps_4_0_level_9_3", VKD3D_SHADER_TYPE_PIXEL, 4, 0, 9, 3, false}, + {"ps_4_1", VKD3D_SHADER_TYPE_PIXEL, 4, 1, 0, 0, false}, + {"ps_5_0", VKD3D_SHADER_TYPE_PIXEL, 5, 0, 0, 0, false}, + {"tx_1_0", VKD3D_SHADER_TYPE_TEXTURE, 1, 0, 0, 0, false}, + {"vs.1.0", VKD3D_SHADER_TYPE_VERTEX, 1, 0, 0, 0, false}, + {"vs.1.1", VKD3D_SHADER_TYPE_VERTEX, 1, 1, 0, 0, false}, + {"vs.2.0", VKD3D_SHADER_TYPE_VERTEX, 2, 0, 0, 0, false}, + {"vs.2.a", VKD3D_SHADER_TYPE_VERTEX, 2, 1, 0, 0, false}, + {"vs.2.sw", VKD3D_SHADER_TYPE_VERTEX, 2, 0, 0, 0, true}, + {"vs.3.0", VKD3D_SHADER_TYPE_VERTEX, 3, 0, 0, 0, false}, + {"vs.3.sw", VKD3D_SHADER_TYPE_VERTEX, 3, 0, 0, 0, true}, + {"vs_1_0", VKD3D_SHADER_TYPE_VERTEX, 1, 0, 0, 0, false}, + {"vs_1_1", VKD3D_SHADER_TYPE_VERTEX, 1, 1, 0, 0, false}, + {"vs_2_0", VKD3D_SHADER_TYPE_VERTEX, 2, 0, 0, 0, false}, + {"vs_2_a", VKD3D_SHADER_TYPE_VERTEX, 2, 1, 0, 0, false}, + {"vs_2_sw", VKD3D_SHADER_TYPE_VERTEX, 2, 0, 0, 0, true}, + {"vs_3_0", VKD3D_SHADER_TYPE_VERTEX, 3, 0, 0, 0, false}, + {"vs_3_sw", VKD3D_SHADER_TYPE_VERTEX, 3, 0, 0, 0, true}, + {"vs_4_0", VKD3D_SHADER_TYPE_VERTEX, 4, 0, 0, 0, false}, + {"vs_4_0_level_9_0", VKD3D_SHADER_TYPE_VERTEX, 4, 0, 9, 0, false}, + {"vs_4_0_level_9_1", VKD3D_SHADER_TYPE_VERTEX, 4, 0, 9, 1, false}, + {"vs_4_0_level_9_3", VKD3D_SHADER_TYPE_VERTEX, 4, 0, 9, 3, false}, + {"vs_4_1", VKD3D_SHADER_TYPE_VERTEX, 4, 1, 0, 0, false}, + {"vs_5_0", VKD3D_SHADER_TYPE_VERTEX, 5, 0, 0, 0, false}, + }; + + for (i = 0; i < ARRAY_SIZE(profiles); ++i) + { + if (!strcmp(target, profiles[i].name)) + return &profiles[i]; + } + + return NULL; +} + +static int compare_function_rb(const void *key, const struct rb_entry *entry) +{ + const char *name = key; + const struct hlsl_ir_function *func = RB_ENTRY_VALUE(entry, const struct hlsl_ir_function,entry); + + return strcmp(name, func->name); +} + +static void declare_predefined_types(struct hlsl_ctx *ctx) +{ + unsigned int x, y, bt, i; + struct hlsl_type *type; + + static const char * const names[] = + { + "float", + "half", + "double", + "int", + "uint", + "bool", + }; + char name[10]; + + static const char *const sampler_names[] = + { + [HLSL_SAMPLER_DIM_GENERIC] = "sampler", + [HLSL_SAMPLER_DIM_1D] = "sampler1D", + [HLSL_SAMPLER_DIM_2D] = "sampler2D", + [HLSL_SAMPLER_DIM_3D] = "sampler3D", + [HLSL_SAMPLER_DIM_CUBE] = "samplerCUBE", + }; + + static const struct + { + char name[13]; + enum hlsl_type_class class; + enum hlsl_base_type base_type; + unsigned int dimx, dimy; + } + effect_types[] = + { + {"DWORD", HLSL_CLASS_SCALAR, HLSL_TYPE_INT, 1, 1}, + {"FLOAT", HLSL_CLASS_SCALAR, HLSL_TYPE_FLOAT, 1, 1}, + {"VECTOR", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, + {"MATRIX", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, + {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, + {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, + {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, + {"VERTEXSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, + }; + + for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) + { + for (y = 1; y <= 4; ++y) + { + for (x = 1; x <= 4; ++x) + { + sprintf(name, "%s%ux%u", names[bt], y, x); + type = hlsl_new_type(ctx, name, HLSL_CLASS_MATRIX, bt, x, y); + hlsl_scope_add_type(ctx->globals, type); + ctx->builtin_types.matrix[bt][x - 1][y - 1] = type; + + if (y == 1) + { + sprintf(name, "%s%u", names[bt], x); + type = hlsl_new_type(ctx, name, HLSL_CLASS_VECTOR, bt, x, y); + hlsl_scope_add_type(ctx->globals, type); + ctx->builtin_types.vector[bt][x - 1] = type; + + if (x == 1) + { + sprintf(name, "%s", names[bt]); + type = hlsl_new_type(ctx, name, HLSL_CLASS_SCALAR, bt, x, y); + hlsl_scope_add_type(ctx->globals, type); + ctx->builtin_types.scalar[bt] = type; + } + } + } + } + } + + for (bt = 0; bt <= HLSL_SAMPLER_DIM_LAST_SAMPLER; ++bt) + { + type = hlsl_new_type(ctx, sampler_names[bt], HLSL_CLASS_OBJECT, HLSL_TYPE_SAMPLER, 1, 1); + type->sampler_dim = bt; + ctx->builtin_types.sampler[bt] = type; + } + + ctx->builtin_types.Void = hlsl_new_type(ctx, "void", HLSL_CLASS_OBJECT, HLSL_TYPE_VOID, 1, 1); + + for (i = 0; i < ARRAY_SIZE(effect_types); ++i) + { + type = hlsl_new_type(ctx, effect_types[i].name, effect_types[i].class, + effect_types[i].base_type, effect_types[i].dimx, effect_types[i].dimy); + hlsl_scope_add_type(ctx->globals, type); + } +} + +static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, + const struct hlsl_profile_info *profile, struct vkd3d_shader_message_context *message_context) +{ + memset(ctx, 0, sizeof(*ctx)); + + ctx->profile = profile; + + ctx->message_context = message_context; + + if (!(ctx->source_files = hlsl_alloc(ctx, sizeof(*ctx->source_files)))) + return false; + if (!(ctx->source_files[0] = hlsl_strdup(ctx, source_name ? source_name : ""))) + { + vkd3d_free(ctx->source_files); + return false; + } + ctx->source_files_count = 1; + ctx->location.source_name = ctx->source_files[0]; + ctx->location.line = ctx->location.column = 1; + vkd3d_string_buffer_cache_init(&ctx->string_buffers); + + ctx->matrix_majority = HLSL_COLUMN_MAJOR; + + list_init(&ctx->scopes); + hlsl_push_scope(ctx); + ctx->globals = ctx->cur_scope; + + list_init(&ctx->types); + declare_predefined_types(ctx); + + rb_init(&ctx->functions, compare_function_rb); + + list_init(&ctx->static_initializers); + list_init(&ctx->extern_vars); + + list_init(&ctx->buffers); + + if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, + hlsl_strdup(ctx, "$Globals"), NULL, ctx->location))) + return false; + if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, + hlsl_strdup(ctx, "$Params"), NULL, ctx->location))) + return false; + ctx->cur_buffer = ctx->globals_buffer; + + return true; +} + +static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) +{ + struct hlsl_buffer *buffer, *next_buffer; + struct hlsl_scope *scope, *next_scope; + struct hlsl_ir_var *var, *next_var; + struct hlsl_type *type, *next_type; + unsigned int i; + + for (i = 0; i < ctx->source_files_count; ++i) + vkd3d_free((void *)ctx->source_files[i]); + vkd3d_free(ctx->source_files); + vkd3d_string_buffer_cache_cleanup(&ctx->string_buffers); + + rb_destroy(&ctx->functions, free_function_rb, NULL); + + LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) + hlsl_free_var(var); + rb_destroy(&scope->types, NULL, NULL); + vkd3d_free(scope); + } + + LIST_FOR_EACH_ENTRY_SAFE(type, next_type, &ctx->types, struct hlsl_type, entry) + hlsl_free_type(type); + + LIST_FOR_EACH_ENTRY_SAFE(buffer, next_buffer, &ctx->buffers, struct hlsl_buffer, entry) + { + vkd3d_free((void *)buffer->name); + vkd3d_free(buffer); + } +} + +int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; + struct hlsl_ir_function_decl *entry_func; + const struct hlsl_profile_info *profile; + const char *entry_point; + struct hlsl_ctx ctx; + int ret; + + if (!(hlsl_source_info = vkd3d_find_struct(compile_info->next, HLSL_SOURCE_INFO))) + { + ERR("No HLSL source info given.\n"); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + entry_point = hlsl_source_info->entry_point ? hlsl_source_info->entry_point : "main"; + + if (!(profile = get_target_info(hlsl_source_info->profile))) + { + FIXME("Unknown compilation target %s.\n", debugstr_a(hlsl_source_info->profile)); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + vkd3d_shader_dump_shader(compile_info->source_type, profile->type, &compile_info->source); + + if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_BYTECODE && profile->major_version > 3) + { + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "The '%s' target profile is incompatible with the 'd3dbc' target type.", profile->name); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + else if (compile_info->target_type == VKD3D_SHADER_TARGET_DXBC_TPF && profile->major_version < 4) + { + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "The '%s' target profile is incompatible with the 'dxbc-tpf' target type.", profile->name); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + if (!hlsl_ctx_init(&ctx, compile_info->source_name, profile, message_context)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if ((ret = hlsl_lexer_compile(&ctx, hlsl)) == 2) + { + hlsl_ctx_cleanup(&ctx); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if (ctx.result) + { + hlsl_ctx_cleanup(&ctx); + return ctx.result; + } + + /* If parsing failed without an error condition being recorded, we + * plausibly hit some unimplemented feature. */ + if (ret) + { + hlsl_ctx_cleanup(&ctx); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + if (!(entry_func = hlsl_get_func_decl(&ctx, entry_point))) + { + const struct vkd3d_shader_location loc = {.source_name = compile_info->source_name}; + + hlsl_error(&ctx, &loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "Entry point \"%s\" is not defined.", entry_point); + hlsl_ctx_cleanup(&ctx); + return VKD3D_ERROR_INVALID_SHADER; + } + + ret = hlsl_emit_bytecode(&ctx, entry_func, compile_info->target_type, out); + + hlsl_ctx_cleanup(&ctx); + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h new file mode 100644 index 00000000000..7fd1eb159c3 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -0,0 +1,814 @@ +/* + * Copyright 2012 Matteo Bruni for CodeWeavers + * Copyright 2019-2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_SHADER_HLSL_H +#define __VKD3D_SHADER_HLSL_H + +#include "vkd3d_shader_private.h" +#include "wine/rbtree.h" +#include "d3dcommon.h" +#include "d3dx9shader.h" +#include "sm4.h" + +/* The general IR structure is inspired by Mesa GLSL hir, even though the code + * ends up being quite different in practice. Anyway, here comes the relevant + * licensing information. + * + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#define HLSL_SWIZZLE_X (0u) +#define HLSL_SWIZZLE_Y (1u) +#define HLSL_SWIZZLE_Z (2u) +#define HLSL_SWIZZLE_W (3u) + +#define HLSL_SWIZZLE(x, y, z, w) \ + (((HLSL_SWIZZLE_ ## x) << 0) \ + | ((HLSL_SWIZZLE_ ## y) << 2) \ + | ((HLSL_SWIZZLE_ ## z) << 4) \ + | ((HLSL_SWIZZLE_ ## w) << 6)) + +enum hlsl_type_class +{ + HLSL_CLASS_SCALAR, + HLSL_CLASS_VECTOR, + HLSL_CLASS_MATRIX, + HLSL_CLASS_LAST_NUMERIC = HLSL_CLASS_MATRIX, + HLSL_CLASS_STRUCT, + HLSL_CLASS_ARRAY, + HLSL_CLASS_OBJECT, +}; + +enum hlsl_base_type +{ + HLSL_TYPE_FLOAT, + HLSL_TYPE_HALF, + HLSL_TYPE_DOUBLE, + HLSL_TYPE_INT, + HLSL_TYPE_UINT, + HLSL_TYPE_BOOL, + HLSL_TYPE_LAST_SCALAR = HLSL_TYPE_BOOL, + HLSL_TYPE_SAMPLER, + HLSL_TYPE_TEXTURE, + HLSL_TYPE_PIXELSHADER, + HLSL_TYPE_VERTEXSHADER, + HLSL_TYPE_STRING, + HLSL_TYPE_VOID, +}; + +enum hlsl_sampler_dim +{ + HLSL_SAMPLER_DIM_GENERIC, + HLSL_SAMPLER_DIM_1D, + HLSL_SAMPLER_DIM_2D, + HLSL_SAMPLER_DIM_3D, + HLSL_SAMPLER_DIM_CUBE, + HLSL_SAMPLER_DIM_LAST_SAMPLER = HLSL_SAMPLER_DIM_CUBE, + HLSL_SAMPLER_DIM_1DARRAY, + HLSL_SAMPLER_DIM_2DARRAY, + HLSL_SAMPLER_DIM_2DMS, + HLSL_SAMPLER_DIM_2DMSARRAY, + HLSL_SAMPLER_DIM_CUBEARRAY, + HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_CUBEARRAY, +}; + +enum hlsl_matrix_majority +{ + HLSL_COLUMN_MAJOR, + HLSL_ROW_MAJOR +}; + +struct hlsl_type +{ + struct list entry; + struct rb_entry scope_entry; + enum hlsl_type_class type; + enum hlsl_base_type base_type; + enum hlsl_sampler_dim sampler_dim; + const char *name; + unsigned int modifiers; + unsigned int dimx; + unsigned int dimy; + union + { + struct list *elements; + struct + { + struct hlsl_type *type; + unsigned int elements_count; + } array; + struct hlsl_type *resource_format; + } e; + + unsigned int reg_size; + size_t bytecode_offset; +}; + +struct hlsl_semantic +{ + const char *name; + uint32_t index; +}; + +struct hlsl_struct_field +{ + struct list entry; + struct vkd3d_shader_location loc; + struct hlsl_type *type; + const char *name; + struct hlsl_semantic semantic; + unsigned int reg_offset; + + size_t name_bytecode_offset; +}; + +struct hlsl_reg +{ + uint32_t id; + unsigned int writemask; + bool allocated; +}; + +enum hlsl_ir_node_type +{ + HLSL_IR_CONSTANT, + HLSL_IR_EXPR, + HLSL_IR_IF, + HLSL_IR_LOAD, + HLSL_IR_LOOP, + HLSL_IR_JUMP, + HLSL_IR_RESOURCE_LOAD, + HLSL_IR_STORE, + HLSL_IR_SWIZZLE, +}; + +struct hlsl_ir_node +{ + struct list entry; + enum hlsl_ir_node_type type; + struct hlsl_type *data_type; + + struct list uses; + + struct vkd3d_shader_location loc; + + /* Liveness ranges. "index" is the index of this instruction. Since this is + * essentially an SSA value, the earliest live point is the index. This is + * true even for loops, since currently we can't have a reference to a + * value generated in an earlier iteration of the loop. */ + unsigned int index, last_read; + struct hlsl_reg reg; +}; + +struct hlsl_block +{ + struct list instrs; +}; + +struct hlsl_src +{ + struct hlsl_ir_node *node; + struct list entry; +}; + +#define HLSL_STORAGE_EXTERN 0x00000001 +#define HLSL_STORAGE_NOINTERPOLATION 0x00000002 +#define HLSL_MODIFIER_PRECISE 0x00000004 +#define HLSL_STORAGE_SHARED 0x00000008 +#define HLSL_STORAGE_GROUPSHARED 0x00000010 +#define HLSL_STORAGE_STATIC 0x00000020 +#define HLSL_STORAGE_UNIFORM 0x00000040 +#define HLSL_STORAGE_VOLATILE 0x00000080 +#define HLSL_MODIFIER_CONST 0x00000100 +#define HLSL_MODIFIER_ROW_MAJOR 0x00000200 +#define HLSL_MODIFIER_COLUMN_MAJOR 0x00000400 +#define HLSL_STORAGE_IN 0x00000800 +#define HLSL_STORAGE_OUT 0x00001000 + +#define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_STORAGE_VOLATILE | \ + HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ + HLSL_MODIFIER_COLUMN_MAJOR) + +#define HLSL_MODIFIERS_MAJORITY_MASK (HLSL_MODIFIER_ROW_MAJOR | HLSL_MODIFIER_COLUMN_MAJOR) + +struct hlsl_reg_reservation +{ + char type; + unsigned int index; +}; + +struct hlsl_ir_var +{ + struct hlsl_type *data_type; + struct vkd3d_shader_location loc; + const char *name; + struct hlsl_semantic semantic; + struct hlsl_buffer *buffer; + unsigned int modifiers; + struct hlsl_reg_reservation reg_reservation; + struct list scope_entry, param_entry, extern_entry; + + unsigned int first_write, last_read; + unsigned int buffer_offset; + struct hlsl_reg reg; + + uint32_t is_input_semantic : 1; + uint32_t is_output_semantic : 1; + uint32_t is_uniform : 1; + uint32_t is_param : 1; +}; + +struct hlsl_ir_function +{ + struct rb_entry entry; + const char *name; + struct rb_tree overloads; + bool intrinsic; +}; + +struct hlsl_ir_function_decl +{ + struct hlsl_type *return_type; + struct hlsl_ir_var *return_var; + struct vkd3d_shader_location loc; + struct rb_entry entry; + struct hlsl_ir_function *func; + struct list *parameters; + struct hlsl_block body; + bool has_body; +}; + +struct hlsl_ir_if +{ + struct hlsl_ir_node node; + struct hlsl_src condition; + struct hlsl_block then_instrs; + struct hlsl_block else_instrs; +}; + +struct hlsl_ir_loop +{ + struct hlsl_ir_node node; + /* loop condition is stored in the body (as "if (!condition) break;") */ + struct hlsl_block body; + unsigned int next_index; /* liveness index of the end of the loop */ +}; + +enum hlsl_ir_expr_op +{ + HLSL_OP1_ABS, + HLSL_OP1_BIT_NOT, + HLSL_OP1_CAST, + HLSL_OP1_COS, + HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ + HLSL_OP1_DSX, + HLSL_OP1_DSY, + HLSL_OP1_EXP2, + HLSL_OP1_FLOOR, + HLSL_OP1_FRACT, + HLSL_OP1_LOG2, + HLSL_OP1_LOGIC_NOT, + HLSL_OP1_NEG, + HLSL_OP1_NRM, + HLSL_OP1_RCP, + HLSL_OP1_ROUND, + HLSL_OP1_RSQ, + HLSL_OP1_SAT, + HLSL_OP1_SIGN, + HLSL_OP1_SIN, + HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi] */ + HLSL_OP1_SQRT, + + HLSL_OP2_ADD, + HLSL_OP2_BIT_AND, + HLSL_OP2_BIT_OR, + HLSL_OP2_BIT_XOR, + HLSL_OP2_CRS, + HLSL_OP2_DIV, + HLSL_OP2_DOT, + HLSL_OP2_EQUAL, + HLSL_OP2_GEQUAL, + HLSL_OP2_LESS, + HLSL_OP2_LOGIC_AND, + HLSL_OP2_LOGIC_OR, + HLSL_OP2_LSHIFT, + HLSL_OP2_MAX, + HLSL_OP2_MIN, + HLSL_OP2_MOD, + HLSL_OP2_MUL, + HLSL_OP2_NEQUAL, + HLSL_OP2_RSHIFT, + + HLSL_OP3_LERP, +}; + +#define HLSL_MAX_OPERANDS 3 + +struct hlsl_ir_expr +{ + struct hlsl_ir_node node; + enum hlsl_ir_expr_op op; + struct hlsl_src operands[HLSL_MAX_OPERANDS]; +}; + +enum hlsl_ir_jump_type +{ + HLSL_IR_JUMP_BREAK, + HLSL_IR_JUMP_CONTINUE, + HLSL_IR_JUMP_DISCARD, + HLSL_IR_JUMP_RETURN, +}; + +struct hlsl_ir_jump +{ + struct hlsl_ir_node node; + enum hlsl_ir_jump_type type; +}; + +struct hlsl_ir_swizzle +{ + struct hlsl_ir_node node; + struct hlsl_src val; + DWORD swizzle; +}; + +struct hlsl_deref +{ + struct hlsl_ir_var *var; + struct hlsl_src offset; +}; + +struct hlsl_ir_load +{ + struct hlsl_ir_node node; + struct hlsl_deref src; +}; + +enum hlsl_resource_load_type +{ + HLSL_RESOURCE_LOAD, + HLSL_RESOURCE_SAMPLE, + HLSL_RESOURCE_GATHER_RED, + HLSL_RESOURCE_GATHER_GREEN, + HLSL_RESOURCE_GATHER_BLUE, + HLSL_RESOURCE_GATHER_ALPHA, +}; + +struct hlsl_ir_resource_load +{ + struct hlsl_ir_node node; + enum hlsl_resource_load_type load_type; + struct hlsl_deref resource, sampler; + struct hlsl_src coords; + struct hlsl_src texel_offset; +}; + +struct hlsl_ir_store +{ + struct hlsl_ir_node node; + struct hlsl_deref lhs; + struct hlsl_src rhs; + unsigned char writemask; +}; + +struct hlsl_ir_constant +{ + struct hlsl_ir_node node; + union hlsl_constant_value + { + uint32_t u; + int32_t i; + float f; + double d; + bool b; + } value[4]; + struct hlsl_reg reg; +}; + +struct hlsl_scope +{ + struct list entry; + struct list vars; + struct rb_tree types; + struct hlsl_scope *upper; +}; + +struct hlsl_profile_info +{ + const char *name; + enum vkd3d_shader_type type; + unsigned int major_version; + unsigned int minor_version; + unsigned int major_level; + unsigned int minor_level; + bool software; +}; + +struct hlsl_vec4 +{ + float f[4]; +}; + +enum hlsl_buffer_type +{ + HLSL_BUFFER_CONSTANT, + HLSL_BUFFER_TEXTURE, +}; + +struct hlsl_buffer +{ + struct vkd3d_shader_location loc; + enum hlsl_buffer_type type; + const char *name; + struct hlsl_reg_reservation reservation; + struct list entry; + + unsigned size, used_size; + struct hlsl_reg reg; +}; + +struct hlsl_ctx +{ + const struct hlsl_profile_info *profile; + + const char **source_files; + unsigned int source_files_count; + struct vkd3d_shader_location location; + struct vkd3d_shader_message_context *message_context; + struct vkd3d_string_buffer_cache string_buffers; + int result; + + void *scanner; + + struct hlsl_scope *cur_scope; + struct hlsl_scope *globals; + struct list scopes; + struct list extern_vars; + + struct list buffers; + struct hlsl_buffer *cur_buffer, *globals_buffer, *params_buffer; + struct list types; + struct rb_tree functions; + const struct hlsl_ir_function_decl *cur_function; + + enum hlsl_matrix_majority matrix_majority; + + struct + { + struct hlsl_type *scalar[HLSL_TYPE_LAST_SCALAR + 1]; + struct hlsl_type *vector[HLSL_TYPE_LAST_SCALAR + 1][4]; + /* matrix[float][2][4] is a float4x2, i.e. dimx = 2, dimy = 4 */ + struct hlsl_type *matrix[HLSL_TYPE_LAST_SCALAR + 1][4][4]; + struct hlsl_type *sampler[HLSL_SAMPLER_DIM_LAST_SAMPLER + 1]; + struct hlsl_type *Void; + } builtin_types; + + struct list static_initializers; + + struct hlsl_constant_defs + { + struct hlsl_vec4 *values; + size_t count, size; + } constant_defs; + uint32_t temp_count; + + uint32_t in_state_block : 1; +}; + +enum hlsl_error_level +{ + HLSL_LEVEL_ERROR = 0, + HLSL_LEVEL_WARNING, + HLSL_LEVEL_NOTE, +}; + +static inline struct hlsl_ir_constant *hlsl_ir_constant(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_CONSTANT); + return CONTAINING_RECORD(node, struct hlsl_ir_constant, node); +} + +static inline struct hlsl_ir_expr *hlsl_ir_expr(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_EXPR); + return CONTAINING_RECORD(node, struct hlsl_ir_expr, node); +} + +static inline struct hlsl_ir_if *hlsl_ir_if(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_IF); + return CONTAINING_RECORD(node, struct hlsl_ir_if, node); +} + +static inline struct hlsl_ir_jump *hlsl_ir_jump(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_JUMP); + return CONTAINING_RECORD(node, struct hlsl_ir_jump, node); +} + +static inline struct hlsl_ir_load *hlsl_ir_load(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_LOAD); + return CONTAINING_RECORD(node, struct hlsl_ir_load, node); +} + +static inline struct hlsl_ir_loop *hlsl_ir_loop(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_LOOP); + return CONTAINING_RECORD(node, struct hlsl_ir_loop, node); +} + +static inline struct hlsl_ir_resource_load *hlsl_ir_resource_load(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_RESOURCE_LOAD); + return CONTAINING_RECORD(node, struct hlsl_ir_resource_load, node); +} + +static inline struct hlsl_ir_store *hlsl_ir_store(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_STORE); + return CONTAINING_RECORD(node, struct hlsl_ir_store, node); +} + +static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_SWIZZLE); + return CONTAINING_RECORD(node, struct hlsl_ir_swizzle, node); +} + +static inline void init_node(struct hlsl_ir_node *node, enum hlsl_ir_node_type type, + struct hlsl_type *data_type, struct vkd3d_shader_location loc) +{ + memset(node, 0, sizeof(*node)); + node->type = type; + node->data_type = data_type; + node->loc = loc; + list_init(&node->uses); +} + +static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) +{ + src->node = node; + if (node) + list_add_tail(&node->uses, &src->entry); +} + +static inline void hlsl_src_remove(struct hlsl_src *src) +{ + if (src->node) + list_remove(&src->entry); + src->node = NULL; +} + +static inline void *hlsl_alloc(struct hlsl_ctx *ctx, size_t size) +{ + void *ptr = vkd3d_calloc(1, size); + + if (!ptr) + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return ptr; +} + +static inline void *hlsl_realloc(struct hlsl_ctx *ctx, void *ptr, size_t size) +{ + void *ret = vkd3d_realloc(ptr, size); + + if (!ret) + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return ret; +} + +static inline char *hlsl_strdup(struct hlsl_ctx *ctx, const char *string) +{ + char *ptr = vkd3d_strdup(string); + + if (!ptr) + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return ptr; +} + +static inline bool hlsl_array_reserve(struct hlsl_ctx *ctx, void **elements, + size_t *capacity, size_t element_count, size_t element_size) +{ + bool ret = vkd3d_array_reserve(elements, capacity, element_count, element_size); + + if (!ret) + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return ret; +} + +static inline struct vkd3d_string_buffer *hlsl_get_string_buffer(struct hlsl_ctx *ctx) +{ + struct vkd3d_string_buffer *ret = vkd3d_string_buffer_get(&ctx->string_buffers); + + if (!ret) + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return ret; +} + +static inline void hlsl_release_string_buffer(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer) +{ + vkd3d_string_buffer_release(&ctx->string_buffers, buffer); +} + +static inline struct hlsl_type *hlsl_get_scalar_type(const struct hlsl_ctx *ctx, enum hlsl_base_type base_type) +{ + return ctx->builtin_types.scalar[base_type]; +} + +static inline struct hlsl_type *hlsl_get_vector_type(const struct hlsl_ctx *ctx, enum hlsl_base_type base_type, + unsigned int dimx) +{ + return ctx->builtin_types.vector[base_type][dimx - 1]; +} + +static inline struct hlsl_type *hlsl_get_matrix_type(const struct hlsl_ctx *ctx, enum hlsl_base_type base_type, + unsigned int dimx, unsigned int dimy) +{ + return ctx->builtin_types.matrix[base_type][dimx - 1][dimy - 1]; +} + +static inline struct hlsl_type *hlsl_get_numeric_type(const struct hlsl_ctx *ctx, enum hlsl_type_class type, + enum hlsl_base_type base_type, unsigned int dimx, unsigned int dimy) +{ + if (type == HLSL_CLASS_SCALAR) + return hlsl_get_scalar_type(ctx, base_type); + else if (type == HLSL_CLASS_VECTOR) + return hlsl_get_vector_type(ctx, base_type, dimx); + else + return hlsl_get_matrix_type(ctx, base_type, dimx, dimy); +} + +static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) +{ + switch (dim) + { + case HLSL_SAMPLER_DIM_1D: + return 1; + case HLSL_SAMPLER_DIM_1DARRAY: + case HLSL_SAMPLER_DIM_2D: + case HLSL_SAMPLER_DIM_2DMS: + return 2; + case HLSL_SAMPLER_DIM_2DARRAY: + case HLSL_SAMPLER_DIM_2DMSARRAY: + case HLSL_SAMPLER_DIM_3D: + case HLSL_SAMPLER_DIM_CUBE: + return 3; + case HLSL_SAMPLER_DIM_CUBEARRAY: + return 4; + default: + assert(0); + return 0; + } +} + +const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op); +const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type); +const char *debug_hlsl_writemask(unsigned int writemask); +const char *debug_hlsl_swizzle(unsigned int swizzle, unsigned int count); + +struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type); +struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); +const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); + +void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl, bool intrinsic); +bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var); + +void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); + +int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); + +void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new); + +void hlsl_free_instr(struct hlsl_ir_node *node); +void hlsl_free_instr_list(struct list *list); +void hlsl_free_type(struct hlsl_type *type); +void hlsl_free_var(struct hlsl_ir_var *decl); + +bool hlsl_get_function(struct hlsl_ctx *ctx, const char *name); +struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const char *name); +struct hlsl_type *hlsl_get_type(struct hlsl_scope *scope, const char *name, bool recursive); +struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name); + +struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size); +struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, + struct hlsl_ir_node *arg2); +struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, + const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc); +struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, + const struct vkd3d_shader_location *loc); +struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); +struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, + struct list *parameters, const struct hlsl_semantic *semantic, struct vkd3d_shader_location loc); +struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc); +struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n, + const struct vkd3d_shader_location loc); +struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc); +struct hlsl_ir_load *hlsl_new_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct hlsl_ir_node *offset, + struct hlsl_type *type, struct vkd3d_shader_location loc); +struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc); +struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, struct hlsl_type *data_type, + enum hlsl_resource_load_type type, struct hlsl_ir_var *resource, struct hlsl_ir_node *resource_offset, + struct hlsl_ir_var *sampler, struct hlsl_ir_node *sampler_offset, struct hlsl_ir_node *coords, + struct hlsl_ir_node *texel_offset, const struct vkd3d_shader_location *loc); +struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); +struct hlsl_ir_store *hlsl_new_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct hlsl_ir_node *offset, + struct hlsl_ir_node *rhs, unsigned int writemask, struct vkd3d_shader_location loc); +struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct list *fields); +struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, + struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); +struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, + const struct vkd3d_shader_location loc); +struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format); +struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, + const struct vkd3d_shader_location loc); +struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, + struct vkd3d_shader_location loc); +struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, + const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers, + const struct hlsl_reg_reservation *reg_reservation); +struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, + const struct vkd3d_shader_location loc); + +void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); +void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + const char *fmt, ...) VKD3D_PRINTF_FUNC(3, 4); +void hlsl_warning(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); +void hlsl_note(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_log_level level, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); + +void hlsl_push_scope(struct hlsl_ctx *ctx); +void hlsl_pop_scope(struct hlsl_ctx *ctx); + +bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type); + +struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, + unsigned int default_majority, unsigned int modifiers); +unsigned int hlsl_type_component_count(struct hlsl_type *type); +unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); +bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); + +unsigned int hlsl_combine_swizzles(unsigned int first, unsigned int second, unsigned int dim); +unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); +unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask); +unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); + +bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset); +unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); +struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct hlsl_type *type); + +bool hlsl_fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); + +bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); +bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); +int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); + +bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, + const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); +bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); +int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); + +int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); + +#endif diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l new file mode 100644 index 00000000000..267c8c30483 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -0,0 +1,305 @@ +/* + * HLSL parser + * + * Copyright 2008 Stefan Dösinger + * Copyright 2012 Matteo Bruni for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +%{ +#define YY_NO_UNISTD_H +#include "hlsl.h" +#include "hlsl.tab.h" + +#define YYSTYPE HLSL_YYSTYPE +#define YYLTYPE HLSL_YYLTYPE + +static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); + +#define YY_USER_ACTION update_location(yyget_extra(yyscanner), yyget_lloc(yyscanner)); + +%} + +%option bison-bridge +%option bison-locations +%option extra-type="struct hlsl_ctx *" +%option never-interactive +%option noinput +%option nounput +%option noyywrap +%option prefix="hlsl_yy" +%option reentrant + +%x pp pp_line pp_pragma pp_ignore + +RESERVED1 auto|case|catch|char|class|const_cast|default|delete|dynamic_cast|enum +RESERVED2 explicit|friend|goto|long|mutable|new|operator|private|protected|public +RESERVED3 reinterpret_cast|short|signed|sizeof|static_cast|template|this|throw|try +RESERVED4 typename|union|unsigned|using|virtual + +WS [ \t] +NEWLINE (\n)|(\r\n) +DOUBLESLASHCOMMENT "//"[^\n]* +STRING \"[^\"]*\" +IDENTIFIER [A-Za-z_][A-Za-z0-9_]* + +ANY (.) + +%% +{RESERVED1} | +{RESERVED2} | +{RESERVED3} | +{RESERVED4} { + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + + hlsl_error(ctx, yylloc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Reserved keyword \"%s\" used.", yytext); + } + +BlendState {return KW_BLENDSTATE; } +break {return KW_BREAK; } +Buffer {return KW_BUFFER; } +cbuffer {return KW_CBUFFER; } +compile {return KW_COMPILE; } +const {return KW_CONST; } +continue {return KW_CONTINUE; } +DepthStencilState {return KW_DEPTHSTENCILSTATE; } +DepthStencilView {return KW_DEPTHSTENCILVIEW; } +discard {return KW_DISCARD; } +do {return KW_DO; } +double {return KW_DOUBLE; } +else {return KW_ELSE; } +extern {return KW_EXTERN; } +false {return KW_FALSE; } +for {return KW_FOR; } +GeometryShader {return KW_GEOMETRYSHADER; } +groupshared {return KW_GROUPSHARED; } +if {return KW_IF; } +in {return KW_IN; } +inline {return KW_INLINE; } +inout {return KW_INOUT; } +matrix {return KW_MATRIX; } +namespace {return KW_NAMESPACE; } +nointerpolation {return KW_NOINTERPOLATION; } +out {return KW_OUT; } +pass {return KW_PASS; } +PixelShader {return KW_PIXELSHADER; } +precise {return KW_PRECISE; } +RasterizerState {return KW_RASTERIZERSTATE; } +RenderTargetView {return KW_RENDERTARGETVIEW; } +return {return KW_RETURN; } +register {return KW_REGISTER; } +sampler {return KW_SAMPLER; } +sampler1D {return KW_SAMPLER1D; } +sampler2D {return KW_SAMPLER2D; } +sampler3D {return KW_SAMPLER3D; } +samplerCUBE {return KW_SAMPLERCUBE; } +sampler_state {return KW_SAMPLER_STATE; } +SamplerComparisonState {return KW_SAMPLERCOMPARISONSTATE;} +SamplerState {return KW_SAMPLER; } +shared {return KW_SHARED; } +stateblock {return KW_STATEBLOCK; } +stateblock_state {return KW_STATEBLOCK_STATE; } +static {return KW_STATIC; } +string {return KW_STRING; } +struct {return KW_STRUCT; } +switch {return KW_SWITCH; } +tbuffer {return KW_TBUFFER; } +technique {return KW_TECHNIQUE; } +technique10 {return KW_TECHNIQUE10; } +texture {return KW_TEXTURE; } +texture1D {return KW_TEXTURE1D; } +Texture1D {return KW_TEXTURE1D; } +Texture1DArray {return KW_TEXTURE1DARRAY; } +texture2D {return KW_TEXTURE2D; } +Texture2D {return KW_TEXTURE2D; } +Texture2DArray {return KW_TEXTURE2DARRAY; } +Texture2DMS {return KW_TEXTURE2DMS; } +Texture2DMSArray {return KW_TEXTURE2DMSARRAY; } +texture3D {return KW_TEXTURE3D; } +Texture3D {return KW_TEXTURE3D; } +textureCUBE {return KW_TEXTURECUBE; } +TextureCube {return KW_TEXTURECUBE; } +TextureCubeArray {return KW_TEXTURECUBEARRAY; } +true {return KW_TRUE; } +typedef {return KW_TYPEDEF; } +uniform {return KW_UNIFORM; } +vector {return KW_VECTOR; } +VertexShader {return KW_VERTEXSHADER; } +void {return KW_VOID; } +volatile {return KW_VOLATILE; } +while {return KW_WHILE; } + +\+\+ {return OP_INC; } +\-\- {return OP_DEC; } +&& {return OP_AND; } +\|\| {return OP_OR; } +== {return OP_EQ; } +\<\< {return OP_LEFTSHIFT; } +\<\<= {return OP_LEFTSHIFTASSIGN; } +\>\> {return OP_RIGHTSHIFT; } +\>\>= {return OP_RIGHTSHIFTASSIGN; } +\.\.\. {return OP_ELLIPSIS; } +\<= {return OP_LE; } +\>= {return OP_GE; } +!= {return OP_NE; } +\+= {return OP_ADDASSIGN; } +\-= {return OP_SUBASSIGN; } +\*= {return OP_MULASSIGN; } +\/= {return OP_DIVASSIGN; } +%= {return OP_MODASSIGN; } +&= {return OP_ANDASSIGN; } +\|= {return OP_ORASSIGN; } +^= {return OP_XORASSIGN; } +## {return OP_UNKNOWN1; } +#@ {return OP_UNKNOWN2; } +:: {return OP_UNKNOWN3; } +\-\> {return OP_UNKNOWN4; } + +column_major {return KW_COLUMN_MAJOR; } +row_major {return KW_ROW_MAJOR; } + +{IDENTIFIER} { + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + + yylval->name = hlsl_strdup(ctx, yytext); + if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) + return VAR_IDENTIFIER; + else if (hlsl_get_type(ctx->cur_scope, yytext, true)) + return TYPE_IDENTIFIER; + else + return NEW_IDENTIFIER; + } + +[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[h|H|f|F]? { + yylval->floatval = atof(yytext); + return C_FLOAT; + } +[0-9]+\.([eE][+-]?[0-9]+)?[h|H|f|F]? { + yylval->floatval = atof(yytext); + return C_FLOAT; + } +[0-9]+[eE][+-]?[0-9]+[h|H|f|F]? { + yylval->floatval = atof(yytext); + return C_FLOAT; + } +0x[0-9a-fA-F]+ { + sscanf(yytext, "0x%x", &yylval->intval); + return C_INTEGER; + } +0[0-7]+ { + sscanf(yytext, "0%o", &yylval->intval); + return C_INTEGER; + } +[0-9]+ { + yylval->intval = (atoi(yytext)); + return C_INTEGER; + } + +{DOUBLESLASHCOMMENT} {} + +{WS}+ {} +{NEWLINE} { + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + + ++ctx->location.line; + ctx->location.column = 1; + } + +^# { + BEGIN(pp); + } + +pragma{WS}+ { + BEGIN(pp_pragma); + } +pack_matrix{WS}*\({WS}*row_major{WS}*\) { + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + + TRACE("#pragma setting row_major mode.\n"); + ctx->matrix_majority = HLSL_ROW_MAJOR; + BEGIN(pp_ignore); + } +pack_matrix{WS}*\({WS}*column_major{WS}*\) { + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + + TRACE("#pragma setting column_major mode.\n"); + ctx->matrix_majority = HLSL_COLUMN_MAJOR; + BEGIN(pp_ignore); + } +{NEWLINE} { + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + + FIXME("Unsupported preprocessor #pragma directive at line %u.\n", ctx->location.line); + BEGIN(INITIAL); + } +{ANY} {} +[0-9]+ { + BEGIN(pp_line); + yylval->intval = (atoi(yytext)); + return PRE_LINE; + } +{STRING} { + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + char *string = hlsl_strdup(ctx, yytext + 1); + + BEGIN(pp_ignore); + string[strlen(string) - 1] = 0; + yylval->name = string; + return STRING; + } +{WS}+ {} +{NEWLINE} { + FIXME("Malformed preprocessor line directive?\n"); + BEGIN(INITIAL); + } +{NEWLINE} { + BEGIN(INITIAL); + } +{ANY} {} +{NEWLINE} { + FIXME("Unexpected preprocessor directive.\n"); + BEGIN(INITIAL); + } +{ANY} {} + +{ANY} { + return yytext[0]; + } + +%% + +static void update_location(struct hlsl_ctx *ctx, YYLTYPE *lloc) +{ + *lloc = ctx->location; + ctx->location.column += yyget_leng(ctx->scanner); +} + +int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl) +{ + YY_BUFFER_STATE buffer; + int ret; + + yylex_init_extra(ctx, &ctx->scanner); + buffer = yy_scan_bytes(hlsl->code, hlsl->size, ctx->scanner); + yy_switch_to_buffer(buffer, ctx->scanner); + + ret = hlsl_yyparse(ctx->scanner, ctx); + + yy_delete_buffer(buffer, ctx->scanner); + yylex_destroy(ctx->scanner); + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y new file mode 100644 index 00000000000..92dcb3f2afd --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -0,0 +1,3857 @@ +/* + * HLSL parser + * + * Copyright 2008 Stefan Dösinger + * Copyright 2012 Matteo Bruni for CodeWeavers + * Copyright 2019-2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +%code requires +{ + +#include "hlsl.h" +#include + +#define HLSL_YYLTYPE struct vkd3d_shader_location + +struct parse_parameter +{ + struct hlsl_type *type; + const char *name; + struct hlsl_semantic semantic; + struct hlsl_reg_reservation reg_reservation; + unsigned int modifiers; +}; + +struct parse_colon_attribute +{ + struct hlsl_semantic semantic; + struct hlsl_reg_reservation reg_reservation; +}; + +struct parse_initializer +{ + struct hlsl_ir_node **args; + unsigned int args_count; + struct list *instrs; +}; + +struct parse_array_sizes +{ + uint32_t *sizes; /* innermost first */ + unsigned int count; +}; + +struct parse_variable_def +{ + struct list entry; + struct vkd3d_shader_location loc; + + char *name; + struct parse_array_sizes arrays; + struct hlsl_semantic semantic; + struct hlsl_reg_reservation reg_reservation; + struct parse_initializer initializer; +}; + +struct parse_function +{ + char *name; + struct hlsl_ir_function_decl *decl; +}; + +struct parse_if_body +{ + struct list *then_instrs; + struct list *else_instrs; +}; + +enum parse_assign_op +{ + ASSIGN_OP_ASSIGN, + ASSIGN_OP_ADD, + ASSIGN_OP_SUB, + ASSIGN_OP_MUL, + ASSIGN_OP_DIV, + ASSIGN_OP_MOD, + ASSIGN_OP_LSHIFT, + ASSIGN_OP_RSHIFT, + ASSIGN_OP_AND, + ASSIGN_OP_OR, + ASSIGN_OP_XOR, +}; + +} + +%code provides +{ + +int yylex(HLSL_YYSTYPE *yylval_param, HLSL_YYLTYPE *yylloc_param, void *yyscanner); + +} + +%code +{ + +#define YYLLOC_DEFAULT(cur, rhs, n) (cur) = YYRHSLOC(rhs, !!n) + +static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const char *s) +{ + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "%s", s); +} + +static struct hlsl_ir_node *node_from_list(struct list *list) +{ + return LIST_ENTRY(list_tail(list), struct hlsl_ir_node, entry); +} + +static struct list *make_empty_list(struct hlsl_ctx *ctx) +{ + struct list *list; + + if ((list = hlsl_alloc(ctx, sizeof(*list)))) + list_init(list); + return list; +} + +static void destroy_instr_list(struct list *list) +{ + hlsl_free_instr_list(list); + vkd3d_free(list); +} + +static void check_invalid_matrix_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, struct vkd3d_shader_location loc) +{ + if (modifiers & HLSL_MODIFIERS_MAJORITY_MASK) + hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "'row_major' and 'column_major' modifiers are only allowed for matrices."); +} + +static bool convertible_data_type(struct hlsl_type *type) +{ + return type->type != HLSL_CLASS_OBJECT; +} + +static bool compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) +{ + if (!convertible_data_type(t1) || !convertible_data_type(t2)) + return false; + + if (t1->type <= HLSL_CLASS_LAST_NUMERIC) + { + /* Scalar vars can be cast to pretty much everything */ + if (t1->dimx == 1 && t1->dimy == 1) + return true; + + if (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_VECTOR) + return t1->dimx >= t2->dimx; + } + + /* The other way around is true too i.e. whatever to scalar */ + if (t2->type <= HLSL_CLASS_LAST_NUMERIC && t2->dimx == 1 && t2->dimy == 1) + return true; + + if (t1->type == HLSL_CLASS_ARRAY) + { + if (hlsl_types_are_equal(t1->e.array.type, t2)) + /* e.g. float4[3] to float4 is allowed */ + return true; + + if (t2->type == HLSL_CLASS_ARRAY || t2->type == HLSL_CLASS_STRUCT) + return hlsl_type_component_count(t1) >= hlsl_type_component_count(t2); + else + return hlsl_type_component_count(t1) == hlsl_type_component_count(t2); + } + + if (t1->type == HLSL_CLASS_STRUCT) + return hlsl_type_component_count(t1) >= hlsl_type_component_count(t2); + + if (t2->type == HLSL_CLASS_ARRAY || t2->type == HLSL_CLASS_STRUCT) + return hlsl_type_component_count(t1) == hlsl_type_component_count(t2); + + if (t1->type == HLSL_CLASS_MATRIX || t2->type == HLSL_CLASS_MATRIX) + { + if (t1->type == HLSL_CLASS_MATRIX && t2->type == HLSL_CLASS_MATRIX && t1->dimx >= t2->dimx && t1->dimy >= t2->dimy) + return true; + + /* Matrix-vector conversion is apparently allowed if they have the same components count */ + if ((t1->type == HLSL_CLASS_VECTOR || t2->type == HLSL_CLASS_VECTOR) + && hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) + return true; + return false; + } + + if (hlsl_type_component_count(t1) >= hlsl_type_component_count(t2)) + return true; + return false; +} + +static bool implicit_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) +{ + if (!convertible_data_type(t1) || !convertible_data_type(t2)) + return false; + + if (t1->type <= HLSL_CLASS_LAST_NUMERIC) + { + /* Scalar vars can be converted to any other numeric data type */ + if (t1->dimx == 1 && t1->dimy == 1 && t2->type <= HLSL_CLASS_LAST_NUMERIC) + return true; + /* The other way around is true too */ + if (t2->dimx == 1 && t2->dimy == 1 && t2->type <= HLSL_CLASS_LAST_NUMERIC) + return true; + } + + if (t1->type == HLSL_CLASS_ARRAY && t2->type == HLSL_CLASS_ARRAY) + { + return hlsl_type_component_count(t1) == hlsl_type_component_count(t2); + } + + if ((t1->type == HLSL_CLASS_ARRAY && t2->type <= HLSL_CLASS_LAST_NUMERIC) + || (t1->type <= HLSL_CLASS_LAST_NUMERIC && t2->type == HLSL_CLASS_ARRAY)) + { + /* e.g. float4[3] to float4 is allowed */ + if (t1->type == HLSL_CLASS_ARRAY && hlsl_types_are_equal(t1->e.array.type, t2)) + return true; + if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) + return true; + return false; + } + + if (t1->type <= HLSL_CLASS_VECTOR && t2->type <= HLSL_CLASS_VECTOR) + { + if (t1->dimx >= t2->dimx) + return true; + return false; + } + + if (t1->type == HLSL_CLASS_MATRIX || t2->type == HLSL_CLASS_MATRIX) + { + if (t1->type == HLSL_CLASS_MATRIX && t2->type == HLSL_CLASS_MATRIX) + return t1->dimx >= t2->dimx && t1->dimy >= t2->dimy; + + /* Matrix-vector conversion is apparently allowed if they have + * the same components count, or if the matrix is 1xN or Nx1 + * and we are reducing the component count */ + if (t1->type == HLSL_CLASS_VECTOR || t2->type == HLSL_CLASS_VECTOR) + { + if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) + return true; + + if ((t1->type == HLSL_CLASS_VECTOR || t1->dimx == 1 || t1->dimy == 1) && + (t2->type == HLSL_CLASS_VECTOR || t2->dimx == 1 || t2->dimy == 1)) + return hlsl_type_component_count(t1) >= hlsl_type_component_count(t2); + } + + return false; + } + + if (t1->type == HLSL_CLASS_STRUCT && t2->type == HLSL_CLASS_STRUCT) + return hlsl_types_are_equal(t1, t2); + + return false; +} + +static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *src_type = node->data_type; + struct hlsl_ir_expr *cast; + + if (hlsl_types_are_equal(src_type, dst_type)) + return node; + + if (!implicit_compatible_data_types(src_type, dst_type)) + { + struct vkd3d_string_buffer *src_string, *dst_string; + + src_string = hlsl_type_to_string(ctx, src_type); + dst_string = hlsl_type_to_string(ctx, dst_type); + if (src_string && dst_string) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Can't implicitly convert from %s to %s.", src_string->buffer, dst_string->buffer); + hlsl_release_string_buffer(ctx, src_string); + hlsl_release_string_buffer(ctx, dst_string); + return NULL; + } + + if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy) + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", + src_type->type == HLSL_CLASS_VECTOR ? "vector" : "matrix"); + + if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) + return NULL; + list_add_tail(instrs, &cast->node.entry); + return &cast->node; +} + +static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, const struct vkd3d_shader_location loc) +{ + if (modifiers & mod) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_modifiers_to_string(ctx, mod))) + hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifier '%s' was already specified.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return modifiers; + } + if ((mod & HLSL_MODIFIERS_MAJORITY_MASK) && (modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) + { + hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "'row_major' and 'column_major' modifiers are mutually exclusive."); + return modifiers; + } + return modifiers | mod; +} + +static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) +{ + struct hlsl_ir_node *condition, *not; + struct hlsl_ir_jump *jump; + struct hlsl_ir_if *iff; + + /* E.g. "for (i = 0; ; ++i)". */ + if (list_empty(cond_list)) + return true; + + condition = node_from_list(cond_list); + if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, condition->loc))) + return false; + list_add_tail(cond_list, ¬->entry); + + if (!(iff = hlsl_new_if(ctx, not, condition->loc))) + return false; + list_add_tail(cond_list, &iff->node.entry); + + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, condition->loc))) + return false; + list_add_head(&iff->then_instrs.instrs, &jump->node.entry); + return true; +} + +enum loop_type +{ + LOOP_FOR, + LOOP_WHILE, + LOOP_DO_WHILE +}; + +static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, struct list *init, struct list *cond, + struct list *iter, struct list *body, struct vkd3d_shader_location loc) +{ + struct list *list = NULL; + struct hlsl_ir_loop *loop = NULL; + struct hlsl_ir_if *cond_jump = NULL; + + if (!(list = make_empty_list(ctx))) + goto oom; + + if (init) + list_move_head(list, init); + + if (!(loop = hlsl_new_loop(ctx, loc))) + goto oom; + list_add_tail(list, &loop->node.entry); + + if (!append_conditional_break(ctx, cond)) + goto oom; + + if (type != LOOP_DO_WHILE) + list_move_tail(&loop->body.instrs, cond); + + list_move_tail(&loop->body.instrs, body); + + if (iter) + list_move_tail(&loop->body.instrs, iter); + + if (type == LOOP_DO_WHILE) + list_move_tail(&loop->body.instrs, cond); + + vkd3d_free(init); + vkd3d_free(cond); + vkd3d_free(body); + return list; + +oom: + vkd3d_free(loop); + vkd3d_free(cond_jump); + vkd3d_free(list); + destroy_instr_list(init); + destroy_instr_list(cond); + destroy_instr_list(iter); + destroy_instr_list(body); + return NULL; +} + +static unsigned int initializer_size(const struct parse_initializer *initializer) +{ + unsigned int count = 0, i; + + for (i = 0; i < initializer->args_count; ++i) + { + count += hlsl_type_component_count(initializer->args[i]->data_type); + } + return count; +} + +static void free_parse_initializer(struct parse_initializer *initializer) +{ + destroy_instr_list(initializer->instrs); + vkd3d_free(initializer->args); +} + +static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_node *value, const char *swizzle, + struct vkd3d_shader_location *loc) +{ + unsigned int len = strlen(swizzle), component = 0; + unsigned int i, set, swiz = 0; + bool valid; + + if (value->data_type->type == HLSL_CLASS_MATRIX) + { + /* Matrix swizzle */ + bool m_swizzle; + unsigned int inc, x, y; + + if (len < 3 || swizzle[0] != '_') + return NULL; + m_swizzle = swizzle[1] == 'm'; + inc = m_swizzle ? 4 : 3; + + if (len % inc || len > inc * 4) + return NULL; + + for (i = 0; i < len; i += inc) + { + if (swizzle[i] != '_') + return NULL; + if (m_swizzle) + { + if (swizzle[i + 1] != 'm') + return NULL; + y = swizzle[i + 2] - '0'; + x = swizzle[i + 3] - '0'; + } + else + { + y = swizzle[i + 1] - '1'; + x = swizzle[i + 2] - '1'; + } + + if (x >= value->data_type->dimx || y >= value->data_type->dimy) + return NULL; + swiz |= (y << 4 | x) << component * 8; + component++; + } + return hlsl_new_swizzle(ctx, swiz, component, value, loc); + } + + /* Vector swizzle */ + if (len > 4) + return NULL; + + for (set = 0; set < 2; ++set) + { + valid = true; + component = 0; + for (i = 0; i < len; ++i) + { + char c[2][4] = {{'x', 'y', 'z', 'w'}, {'r', 'g', 'b', 'a'}}; + unsigned int s = 0; + + for (s = 0; s < 4; ++s) + { + if (swizzle[i] == c[set][s]) + break; + } + if (s == 4) + { + valid = false; + break; + } + + if (s >= value->data_type->dimx) + return NULL; + swiz |= s << component * 2; + component++; + } + if (valid) + return hlsl_new_swizzle(ctx, swiz, component, value, loc); + } + + return NULL; +} + +static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *return_value, struct vkd3d_shader_location loc) +{ + struct hlsl_type *return_type = ctx->cur_function->return_type; + struct hlsl_ir_jump *jump; + + if (return_value) + { + struct hlsl_ir_store *store; + + if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, &loc))) + return NULL; + + if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) + return NULL; + list_add_after(&return_value->entry, &store->node.entry); + } + else if (ctx->cur_function->return_var) + { + hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Non-void function must return a value."); + return NULL; + } + + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) + return NULL; + list_add_tail(instrs, &jump->node.entry); + + return jump; +} + +static struct hlsl_ir_load *add_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_node, + struct hlsl_ir_node *offset, struct hlsl_type *data_type, const struct vkd3d_shader_location loc) +{ + struct hlsl_ir_node *add = NULL; + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + + if (var_node->type == HLSL_IR_LOAD) + { + const struct hlsl_deref *src = &hlsl_ir_load(var_node)->src; + + var = src->var; + if (src->offset.node) + { + if (!(add = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, src->offset.node, offset))) + return NULL; + list_add_tail(instrs, &add->entry); + offset = add; + } + } + else + { + struct hlsl_ir_store *store; + char name[27]; + + sprintf(name, "", var_node); + if (!(var = hlsl_new_synthetic_var(ctx, name, var_node->data_type, var_node->loc))) + return NULL; + + if (!(store = hlsl_new_simple_store(ctx, var, var_node))) + return NULL; + + list_add_tail(instrs, &store->node.entry); + } + + if (!(load = hlsl_new_load(ctx, var, offset, data_type, loc))) + return NULL; + list_add_tail(instrs, &load->node.entry); + return load; +} + +static struct hlsl_ir_load *add_record_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, + const struct hlsl_struct_field *field, const struct vkd3d_shader_location loc) +{ + struct hlsl_ir_constant *c; + + if (!(c = hlsl_new_uint_constant(ctx, field->reg_offset, loc))) + return NULL; + list_add_tail(instrs, &c->node.entry); + + return add_load(ctx, instrs, record, &c->node, field->type, loc); +} + +static struct hlsl_ir_load *add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, + struct hlsl_ir_node *index, const struct vkd3d_shader_location loc) +{ + const struct hlsl_type *expr_type = array->data_type; + struct hlsl_type *data_type; + struct hlsl_ir_constant *c; + struct hlsl_ir_node *mul; + + if (expr_type->type == HLSL_CLASS_ARRAY) + { + data_type = expr_type->e.array.type; + } + else if (expr_type->type == HLSL_CLASS_MATRIX || expr_type->type == HLSL_CLASS_VECTOR) + { + /* This needs to be lowered now, while we still have type information. */ + FIXME("Index of matrix or vector type.\n"); + return NULL; + } + else + { + if (expr_type->type == HLSL_CLASS_SCALAR) + hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, "Scalar expressions cannot be array-indexed."); + else + hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, "Expression cannot be array-indexed."); + return NULL; + } + + if (!(c = hlsl_new_uint_constant(ctx, data_type->reg_size, loc))) + return NULL; + list_add_tail(instrs, &c->node.entry); + if (!(mul = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, index, &c->node))) + return NULL; + list_add_tail(instrs, &mul->entry); + index = mul; + + return add_load(ctx, instrs, array, index, data_type, loc); +} + +static struct hlsl_struct_field *get_struct_field(struct list *fields, const char *name) +{ + struct hlsl_struct_field *f; + + LIST_FOR_EACH_ENTRY(f, fields, struct hlsl_struct_field, entry) + { + if (!strcmp(f->name, name)) + return f; + } + return NULL; +} + +static struct hlsl_type *apply_type_modifiers(struct hlsl_ctx *ctx, struct hlsl_type *type, + unsigned int *modifiers, struct vkd3d_shader_location loc) +{ + unsigned int default_majority = 0; + struct hlsl_type *new_type; + + /* This function is only used for declarations (i.e. variables and struct + * fields), which should inherit the matrix majority. We only explicitly set + * the default majority for declarations—typedefs depend on this—but we + * want to always set it, so that an hlsl_type object is never used to + * represent two different majorities (and thus can be used to store its + * register size, etc.) */ + if (!(*modifiers & HLSL_MODIFIERS_MAJORITY_MASK) + && !(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK) + && type->type == HLSL_CLASS_MATRIX) + { + if (ctx->matrix_majority == HLSL_COLUMN_MAJOR) + default_majority = HLSL_MODIFIER_COLUMN_MAJOR; + else + default_majority = HLSL_MODIFIER_ROW_MAJOR; + } + + if (!default_majority && !(*modifiers & HLSL_TYPE_MODIFIERS_MASK)) + return type; + + if (!(new_type = hlsl_type_clone(ctx, type, default_majority, *modifiers & HLSL_TYPE_MODIFIERS_MASK))) + return NULL; + + *modifiers &= ~HLSL_TYPE_MODIFIERS_MASK; + + if ((new_type->modifiers & HLSL_MODIFIER_ROW_MAJOR) && (new_type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)) + hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "'row_major' and 'column_major' modifiers are mutually exclusive."); + + return new_type; +} + +static struct list *gen_struct_fields(struct hlsl_ctx *ctx, struct hlsl_type *type, struct list *fields) +{ + struct parse_variable_def *v, *v_next; + struct hlsl_struct_field *field; + struct list *list; + + if (type->type == HLSL_CLASS_MATRIX) + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + + if (!(list = make_empty_list(ctx))) + return NULL; + LIST_FOR_EACH_ENTRY_SAFE(v, v_next, fields, struct parse_variable_def, entry) + { + unsigned int i; + + if (!(field = hlsl_alloc(ctx, sizeof(*field)))) + { + vkd3d_free(v); + return list; + } + + field->type = type; + for (i = 0; i < v->arrays.count; ++i) + field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[i]); + field->loc = v->loc; + field->name = v->name; + field->semantic = v->semantic; + if (v->initializer.args_count) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Illegal initializer on a struct field."); + free_parse_initializer(&v->initializer); + } + list_add_tail(list, &field->entry); + vkd3d_free(v); + } + vkd3d_free(fields); + return list; +} + +static bool add_typedef(struct hlsl_ctx *ctx, DWORD modifiers, struct hlsl_type *orig_type, struct list *list) +{ + struct parse_variable_def *v, *v_next; + struct hlsl_type *type; + unsigned int i; + bool ret; + + LIST_FOR_EACH_ENTRY_SAFE(v, v_next, list, struct parse_variable_def, entry) + { + if (!v->arrays.count) + { + if (!(type = hlsl_type_clone(ctx, orig_type, 0, modifiers))) + return false; + } + else + { + type = orig_type; + } + + for (i = 0; i < v->arrays.count; ++i) + { + if (!(type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]))) + return false; + } + + vkd3d_free((void *)type->name); + type->name = v->name; + + if (type->type != HLSL_CLASS_MATRIX) + check_invalid_matrix_modifiers(ctx, type->modifiers, v->loc); + + if ((type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + && (type->modifiers & HLSL_MODIFIER_ROW_MAJOR)) + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "'row_major' and 'column_major' modifiers are mutually exclusive."); + + ret = hlsl_scope_add_type(ctx->cur_scope, type); + if (!ret) + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "Type '%s' is already defined.", v->name); + vkd3d_free(v); + } + vkd3d_free(list); + return true; +} + +static bool add_func_parameter(struct hlsl_ctx *ctx, struct list *list, + struct parse_parameter *param, const struct vkd3d_shader_location loc) +{ + struct hlsl_ir_var *var; + + if (param->type->type == HLSL_CLASS_MATRIX) + assert(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + + if ((param->modifiers & HLSL_STORAGE_OUT) && (param->modifiers & HLSL_STORAGE_UNIFORM)) + hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Parameter '%s' is declared as both \"out\" and \"uniform\".", param->name); + + if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, ¶m->reg_reservation))) + return false; + var->is_param = 1; + + if (!hlsl_add_var(ctx, var, false)) + { + hlsl_free_var(var); + return false; + } + list_add_tail(list, &var->param_entry); + return true; +} + +static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) +{ + struct hlsl_reg_reservation reservation = {0}; + + if (!sscanf(reg_string + 1, "%u", &reservation.index)) + { + FIXME("Unsupported register reservation syntax.\n"); + return reservation; + } + reservation.type = reg_string[0]; + return reservation; +} + +static const struct hlsl_ir_function_decl *get_func_decl(struct rb_tree *funcs, char *name, struct list *params) +{ + struct hlsl_ir_function *func; + struct rb_entry *entry; + + if ((entry = rb_get(funcs, name))) + { + func = RB_ENTRY_VALUE(entry, struct hlsl_ir_function, entry); + + if ((entry = rb_get(&func->overloads, params))) + return RB_ENTRY_VALUE(entry, struct hlsl_ir_function_decl, entry); + } + return NULL; +} + +static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) +{ + struct list *list; + + if (!(list = make_empty_list(ctx))) + { + hlsl_free_instr(node); + return NULL; + } + list_add_tail(list, &node->entry); + return list; +} + +static unsigned int evaluate_array_dimension(struct hlsl_ir_node *node) +{ + if (node->data_type->type != HLSL_CLASS_SCALAR) + return 0; + + switch (node->type) + { + case HLSL_IR_CONSTANT: + { + struct hlsl_ir_constant *constant = hlsl_ir_constant(node); + const union hlsl_constant_value *value = &constant->value[0]; + + switch (constant->node.data_type->base_type) + { + case HLSL_TYPE_UINT: + return value->u; + case HLSL_TYPE_INT: + return value->i; + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return value->f; + case HLSL_TYPE_DOUBLE: + return value->d; + case HLSL_TYPE_BOOL: + return value->b; + default: + assert(0); + return 0; + } + } + + case HLSL_IR_EXPR: + case HLSL_IR_LOAD: + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_SWIZZLE: + FIXME("Unhandled type %s.\n", hlsl_node_type_to_string(node->type)); + return 0; + + case HLSL_IR_IF: + case HLSL_IR_JUMP: + case HLSL_IR_LOOP: + case HLSL_IR_STORE: + WARN("Invalid node type %s.\n", hlsl_node_type_to_string(node->type)); + return 0; + } + + assert(0); + return 0; +} + +static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) +{ + if (t1->base_type > HLSL_TYPE_LAST_SCALAR || t2->base_type > HLSL_TYPE_LAST_SCALAR) + return false; + + /* Scalar vars can be converted to pretty much everything */ + if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) + return true; + + if (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_VECTOR) + return true; + + if (t1->type == HLSL_CLASS_MATRIX || t2->type == HLSL_CLASS_MATRIX) + { + /* Matrix-vector conversion is apparently allowed if either they have the same components + count or the matrix is nx1 or 1xn */ + if (t1->type == HLSL_CLASS_VECTOR || t2->type == HLSL_CLASS_VECTOR) + { + if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) + return true; + + return (t1->type == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) + || (t2->type == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); + } + + /* Both matrices */ + if ((t1->dimx >= t2->dimx && t1->dimy >= t2->dimy) + || (t1->dimx <= t2->dimx && t1->dimy <= t2->dimy)) + return true; + } + + return false; +} + +static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hlsl_base_type t2) +{ + if (t1 > HLSL_TYPE_LAST_SCALAR || t2 > HLSL_TYPE_LAST_SCALAR) { + FIXME("Unexpected base type.\n"); + return HLSL_TYPE_FLOAT; + } + if (t1 == t2) + return t1 == HLSL_TYPE_BOOL ? HLSL_TYPE_INT : t1; + if (t1 == HLSL_TYPE_DOUBLE || t2 == HLSL_TYPE_DOUBLE) + return HLSL_TYPE_DOUBLE; + if (t1 == HLSL_TYPE_FLOAT || t2 == HLSL_TYPE_FLOAT + || t1 == HLSL_TYPE_HALF || t2 == HLSL_TYPE_HALF) + return HLSL_TYPE_FLOAT; + if (t1 == HLSL_TYPE_UINT || t2 == HLSL_TYPE_UINT) + return HLSL_TYPE_UINT; + return HLSL_TYPE_INT; +} + +static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct hlsl_type *t2, + const struct vkd3d_shader_location *loc, enum hlsl_type_class *type, unsigned int *dimx, unsigned int *dimy) +{ + if (t1->type > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, t1))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Expression of type \"%s\" cannot be used in a numeric expression.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (t2->type > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, t2))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Expression of type \"%s\" cannot be used in a numeric expression.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (!expr_compatible_data_types(t1, t2)) + { + struct vkd3d_string_buffer *t1_string = hlsl_type_to_string(ctx, t1); + struct vkd3d_string_buffer *t2_string = hlsl_type_to_string(ctx, t2); + + if (t1_string && t2_string) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Expression data types \"%s\" and \"%s\" are incompatible.", + t1_string->buffer, t2_string->buffer); + hlsl_release_string_buffer(ctx, t1_string); + hlsl_release_string_buffer(ctx, t2_string); + return false; + } + + if (t1->dimx == 1 && t1->dimy == 1) + { + *type = t2->type; + *dimx = t2->dimx; + *dimy = t2->dimy; + } + else if (t2->dimx == 1 && t2->dimy == 1) + { + *type = t1->type; + *dimx = t1->dimx; + *dimy = t1->dimy; + } + else if (t1->type == HLSL_CLASS_MATRIX && t2->type == HLSL_CLASS_MATRIX) + { + *type = HLSL_CLASS_MATRIX; + *dimx = min(t1->dimx, t2->dimx); + *dimy = min(t1->dimy, t2->dimy); + } + else + { + if (t1->dimx * t1->dimy <= t2->dimx * t2->dimy) + { + *type = t1->type; + *dimx = t1->dimx; + *dimy = t1->dimy; + } + else + { + *type = t2->type; + *dimx = t2->dimx; + *dimy = t2->dimy; + } + } + + return true; +} + +static struct hlsl_ir_expr *add_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], + struct hlsl_type *type, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_expr *expr; + unsigned int i; + + if (!(expr = hlsl_alloc(ctx, sizeof(*expr)))) + return NULL; + init_node(&expr->node, HLSL_IR_EXPR, type, *loc); + expr->op = op; + for (i = 0; i < HLSL_MAX_OPERANDS; ++i) + hlsl_src_from_node(&expr->operands[i], operands[i]); + list_add_tail(instrs, &expr->node.entry); + + return expr; +} + +static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) +{ + const struct hlsl_type *type = instr->data_type; + struct vkd3d_string_buffer *string; + + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + break; + + default: + if ((string = hlsl_type_to_string(ctx, type))) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Expression type '%s' is not integer.", string->buffer); + hlsl_release_string_buffer(ctx, string); + break; + } +} + +static struct hlsl_ir_expr *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg}; + + return add_expr(ctx, instrs, op, args, arg->data_type, loc); +} + +static struct hlsl_ir_expr *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) +{ + check_integer_type(ctx, arg); + + return add_unary_arithmetic_expr(ctx, instrs, op, arg, loc); +} + +static struct hlsl_ir_expr *add_unary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *bool_type; + + bool_type = hlsl_get_numeric_type(ctx, arg->data_type->type, HLSL_TYPE_BOOL, + arg->data_type->dimx, arg->data_type->dimy); + + if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) + return NULL; + + return add_expr(ctx, instrs, op, args, bool_type, loc); +} + +static struct hlsl_ir_expr *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *common_type; + enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); + enum hlsl_type_class type; + unsigned int dimx, dimy; + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + + if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) + return NULL; + + common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + + if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + return NULL; + + if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + return NULL; + + return add_expr(ctx, instrs, op, args, common_type, loc); +} + +static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, + enum hlsl_ir_expr_op op, struct vkd3d_shader_location loc) +{ + struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); + + list_move_tail(list1, list2); + vkd3d_free(list2); + add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, &loc); + return list1; +} + +static struct hlsl_ir_expr *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) +{ + check_integer_type(ctx, arg1); + check_integer_type(ctx, arg2); + + return add_binary_arithmetic_expr(ctx, instrs, op, arg1, arg2, loc); +} + +static struct list *add_binary_bitwise_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, + enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); + + list_move_tail(list1, list2); + vkd3d_free(list2); + add_binary_bitwise_expr(ctx, list1, op, arg1, arg2, loc); + + return list1; +} + +static struct hlsl_ir_expr *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + struct vkd3d_shader_location *loc) +{ + struct hlsl_type *common_type, *return_type; + enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); + enum hlsl_type_class type; + unsigned int dimx, dimy; + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + + if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) + return NULL; + + common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); + + if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + return NULL; + + if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + return NULL; + + return add_expr(ctx, instrs, op, args, return_type, loc); +} + +static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, + enum hlsl_ir_expr_op op, struct vkd3d_shader_location loc) +{ + struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); + + list_move_tail(list1, list2); + vkd3d_free(list2); + add_binary_comparison_expr(ctx, list1, op, arg1, arg2, &loc); + return list1; +} + +static struct hlsl_ir_expr *add_binary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *common_type; + enum hlsl_type_class type; + unsigned int dimx, dimy; + + if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) + return NULL; + + common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); + + if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + return NULL; + + if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + return NULL; + + return add_expr(ctx, instrs, op, args, common_type, loc); +} + +static struct list *add_binary_logical_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, + enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); + + list_move_tail(list1, list2); + vkd3d_free(list2); + add_binary_logical_expr(ctx, list1, op, arg1, arg2, loc); + + return list1; +} + +static struct hlsl_ir_expr *add_binary_shift_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) +{ + enum hlsl_base_type base = arg1->data_type->base_type; + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *return_type, *integer_type; + enum hlsl_type_class type; + unsigned int dimx, dimy; + + check_integer_type(ctx, arg1); + check_integer_type(ctx, arg2); + + if (base == HLSL_TYPE_BOOL) + base = HLSL_TYPE_INT; + + if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) + return NULL; + + return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy); + + if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, return_type, loc))) + return NULL; + + if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, integer_type, loc))) + return NULL; + + return add_expr(ctx, instrs, op, args, return_type, loc); +} + +static struct list *add_binary_shift_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, + enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); + + list_move_tail(list1, list2); + vkd3d_free(list2); + add_binary_shift_expr(ctx, list1, op, arg1, arg2, loc); + + return list1; +} + +static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) +{ + static const enum hlsl_ir_expr_op ops[] = + { + 0, + HLSL_OP2_ADD, + 0, + HLSL_OP2_MUL, + HLSL_OP2_DIV, + HLSL_OP2_MOD, + HLSL_OP2_LSHIFT, + HLSL_OP2_RSHIFT, + HLSL_OP2_BIT_AND, + HLSL_OP2_BIT_OR, + HLSL_OP2_BIT_XOR, + }; + + return ops[op]; +} + +static bool invert_swizzle(unsigned int *swizzle, unsigned int *writemask, unsigned int *ret_width) +{ + unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; + + /* Apply the writemask to the swizzle to get a new writemask and swizzle. */ + for (i = 0; i < 4; ++i) + { + if (*writemask & (1 << i)) + { + unsigned int s = (*swizzle >> (i * 2)) & 3; + new_swizzle |= s << (bit++ * 2); + if (new_writemask & (1 << s)) + return false; + new_writemask |= 1 << s; + } + } + width = bit; + + /* Invert the swizzle. */ + bit = 0; + for (i = 0; i < 4; ++i) + { + for (j = 0; j < width; ++j) + { + unsigned int s = (new_swizzle >> (j * 2)) & 3; + if (s == i) + inverted |= j << (bit++ * 2); + } + } + + *swizzle = inverted; + *writemask = new_writemask; + *ret_width = width; + return true; +} + +static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *lhs, + enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) +{ + struct hlsl_type *lhs_type = lhs->data_type; + struct hlsl_ir_store *store; + struct hlsl_ir_expr *copy; + unsigned int writemask = 0; + + if (assign_op == ASSIGN_OP_SUB) + { + struct hlsl_ir_expr *expr; + + if (!(expr = add_unary_arithmetic_expr(ctx, instrs, HLSL_OP1_NEG, rhs, &rhs->loc))) + return NULL; + rhs = &expr->node; + assign_op = ASSIGN_OP_ADD; + } + if (assign_op != ASSIGN_OP_ASSIGN) + { + enum hlsl_ir_expr_op op = op_from_assignment(assign_op); + struct hlsl_ir_expr *expr; + + assert(op); + if (!(expr = add_binary_arithmetic_expr(ctx, instrs, op, lhs, rhs, &rhs->loc))) + return NULL; + rhs = &expr->node; + } + + if (lhs_type->type <= HLSL_CLASS_LAST_NUMERIC) + { + writemask = (1 << lhs_type->dimx) - 1; + + if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) + return NULL; + } + + if (!(store = hlsl_alloc(ctx, sizeof(*store)))) + return NULL; + + while (lhs->type != HLSL_IR_LOAD) + { + if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) + { + hlsl_fixme(ctx, &lhs->loc, "Cast on the LHS."); + vkd3d_free(store); + return NULL; + } + else if (lhs->type == HLSL_IR_SWIZZLE) + { + struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs), *new_swizzle; + unsigned int width, s = swizzle->swizzle; + + if (lhs->data_type->type == HLSL_CLASS_MATRIX) + hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); + + if (!invert_swizzle(&s, &writemask, &width)) + { + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); + vkd3d_free(store); + return NULL; + } + + if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc))) + { + vkd3d_free(store); + return NULL; + } + list_add_tail(instrs, &new_swizzle->node.entry); + + lhs = swizzle->val.node; + rhs = &new_swizzle->node; + } + else + { + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Invalid lvalue."); + vkd3d_free(store); + return NULL; + } + } + + init_node(&store->node, HLSL_IR_STORE, NULL, lhs->loc); + store->writemask = writemask; + store->lhs.var = hlsl_ir_load(lhs)->src.var; + hlsl_src_from_node(&store->lhs.offset, hlsl_ir_load(lhs)->src.offset.node); + hlsl_src_from_node(&store->rhs, rhs); + list_add_tail(instrs, &store->node.entry); + + /* Don't use the instruction itself as a source, as this makes structure + * splitting easier. Instead copy it here. Since we retrieve sources from + * the last instruction in the list, we do need to copy. */ + if (!(copy = hlsl_new_copy(ctx, rhs))) + return NULL; + list_add_tail(instrs, ©->node.entry); + return ©->node; +} + +static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, + struct vkd3d_shader_location loc) +{ + struct hlsl_ir_node *lhs = node_from_list(instrs); + struct hlsl_ir_constant *one; + + if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) + hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, + "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in"); + + if (!(one = hlsl_new_int_constant(ctx, 1, loc))) + return false; + list_add_tail(instrs, &one->node.entry); + + if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, &one->node)) + return false; + + if (post) + { + struct hlsl_ir_expr *copy; + + if (!(copy = hlsl_new_copy(ctx, lhs))) + return false; + list_add_tail(instrs, ©->node.entry); + + /* Post increment/decrement expressions are considered const. */ + if (!(copy->node.data_type = hlsl_type_clone(ctx, copy->node.data_type, 0, HLSL_MODIFIER_CONST))) + return false; + } + + return true; +} + +static void struct_var_initializer(struct hlsl_ctx *ctx, struct list *list, struct hlsl_ir_var *var, + struct parse_initializer *initializer) +{ + struct hlsl_type *type = var->data_type; + struct hlsl_struct_field *field; + unsigned int i = 0; + + if (initializer_size(initializer) != hlsl_type_component_count(type)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected %u components in initializer, but got %u.", + hlsl_type_component_count(type), initializer_size(initializer)); + free_parse_initializer(initializer); + return; + } + + list_move_tail(list, initializer->instrs); + vkd3d_free(initializer->instrs); + + LIST_FOR_EACH_ENTRY(field, type->e.elements, struct hlsl_struct_field, entry) + { + struct hlsl_ir_node *node = initializer->args[i]; + struct hlsl_ir_store *store; + struct hlsl_ir_constant *c; + + if (i++ >= initializer->args_count) + break; + + if (hlsl_type_component_count(field->type) == hlsl_type_component_count(node->data_type)) + { + if (!(c = hlsl_new_uint_constant(ctx, field->reg_offset, node->loc))) + break; + list_add_tail(list, &c->node.entry); + + if (!(store = hlsl_new_store(ctx, var, &c->node, node, 0, node->loc))) + break; + list_add_tail(list, &store->node.entry); + } + else + { + hlsl_fixme(ctx, &node->loc, "Implicit cast in structure initializer."); + } + } + + vkd3d_free(initializer->args); +} + +static void free_parse_variable_def(struct parse_variable_def *v) +{ + free_parse_initializer(&v->initializer); + vkd3d_free(v->arrays.sizes); + vkd3d_free(v->name); + vkd3d_free((void *)v->semantic.name); + vkd3d_free(v); +} + +static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, + DWORD modifiers, struct list *var_list) +{ + struct parse_variable_def *v, *v_next; + struct hlsl_ir_function_decl *func; + struct list *statements_list; + struct hlsl_ir_var *var; + struct hlsl_type *type; + bool local = true; + + if (basic_type->type == HLSL_CLASS_MATRIX) + assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + + if (!(statements_list = make_empty_list(ctx))) + { + LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) + free_parse_variable_def(v); + vkd3d_free(var_list); + return NULL; + } + + if (!var_list) + return statements_list; + + LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) + { + unsigned int i; + + type = basic_type; + for (i = 0; i < v->arrays.count; ++i) + type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); + + if (type->type != HLSL_CLASS_MATRIX) + check_invalid_matrix_modifiers(ctx, modifiers, v->loc); + + if (!(var = hlsl_new_var(ctx, v->name, type, v->loc, &v->semantic, modifiers, &v->reg_reservation))) + { + free_parse_variable_def(v); + continue; + } + + var->buffer = ctx->cur_buffer; + + if (ctx->cur_scope == ctx->globals) + { + local = false; + + if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); + + /* Mark it as uniform. We need to do this here since synthetic + * variables also get put in the global scope, but shouldn't be + * considered uniforms, and we have no way of telling otherwise. */ + if (!(modifiers & HLSL_STORAGE_STATIC)) + var->modifiers |= HLSL_STORAGE_UNIFORM; + + if ((func = hlsl_get_func_decl(ctx, var->name))) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "'%s' is already defined as a function.", var->name); + hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, + "'%s' was previously defined here.", var->name); + } + } + else + { + static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED + | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; + + if (modifiers & invalid) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers '%s' are not allowed on local variables.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + if (var->semantic.name) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Semantics are not allowed on local variables."); + } + + if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count + && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, + "Const variable \"%s\" is missing an initializer.", var->name); + hlsl_free_var(var); + vkd3d_free(v); + continue; + } + + if (!hlsl_add_var(ctx, var, local)) + { + struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); + + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "Variable \"%s\" was already declared in this scope.", var->name); + hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); + hlsl_free_var(var); + vkd3d_free(v); + continue; + } + + if (v->initializer.args_count) + { + unsigned int size = initializer_size(&v->initializer); + struct hlsl_ir_load *load; + + if (type->type <= HLSL_CLASS_LAST_NUMERIC + && type->dimx * type->dimy != size && size != 1) + { + if (size < type->dimx * type->dimy) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected %u components in numeric initializer, but got %u.", + type->dimx * type->dimy, size); + free_parse_initializer(&v->initializer); + vkd3d_free(v); + continue; + } + } + if ((type->type == HLSL_CLASS_STRUCT || type->type == HLSL_CLASS_ARRAY) + && hlsl_type_component_count(type) != size) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected %u components in initializer, but got %u.", hlsl_type_component_count(type), size); + free_parse_initializer(&v->initializer); + vkd3d_free(v); + continue; + } + + if (type->type == HLSL_CLASS_STRUCT) + { + struct_var_initializer(ctx, statements_list, var, &v->initializer); + vkd3d_free(v); + continue; + } + if (type->type > HLSL_CLASS_LAST_NUMERIC) + { + FIXME("Initializers for non scalar/struct variables not supported yet.\n"); + free_parse_initializer(&v->initializer); + vkd3d_free(v); + continue; + } + if (v->arrays.count) + { + hlsl_fixme(ctx, &v->loc, "Array initializer."); + free_parse_initializer(&v->initializer); + vkd3d_free(v); + continue; + } + if (v->initializer.args_count > 1) + { + hlsl_fixme(ctx, &v->loc, "Complex initializer."); + free_parse_initializer(&v->initializer); + vkd3d_free(v); + continue; + } + + load = hlsl_new_var_load(ctx, var, var->loc); + list_add_tail(v->initializer.instrs, &load->node.entry); + add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); + vkd3d_free(v->initializer.args); + + if (modifiers & HLSL_STORAGE_STATIC) + list_move_tail(&ctx->static_initializers, v->initializer.instrs); + else + list_move_tail(statements_list, v->initializer.instrs); + vkd3d_free(v->initializer.instrs); + } + vkd3d_free(v); + } + vkd3d_free(var_list); + return statements_list; +} + +struct find_function_call_args +{ + const struct parse_initializer *params; + const struct hlsl_ir_function_decl *decl; +}; + +static void find_function_call_exact(struct rb_entry *entry, void *context) +{ + const struct hlsl_ir_function_decl *decl = RB_ENTRY_VALUE(entry, const struct hlsl_ir_function_decl, entry); + struct find_function_call_args *args = context; + const struct hlsl_ir_var *param; + unsigned int i = 0; + + LIST_FOR_EACH_ENTRY(param, decl->parameters, struct hlsl_ir_var, param_entry) + { + if (i >= args->params->args_count + || !hlsl_types_are_equal(param->data_type, args->params->args[i++]->data_type)) + return; + } + if (i == args->params->args_count) + args->decl = decl; +} + +static const struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, + const char *name, const struct parse_initializer *params) +{ + struct find_function_call_args args = {.params = params}; + struct hlsl_ir_function *func; + struct rb_entry *entry; + + if (!(entry = rb_get(&ctx->functions, name))) + return NULL; + func = RB_ENTRY_VALUE(entry, struct hlsl_ir_function, entry); + + rb_for_each_entry(&func->overloads, find_function_call_exact, &args); + if (!args.decl) + FIXME("Search for compatible overloads.\n"); + return args.decl; +} + +static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, + const struct parse_initializer *params, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *type = arg->data_type; + + if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) + return arg; + + type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + return add_implicit_conversion(ctx, params->instrs, arg, type, loc); +} + +static bool intrinsic_abs(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, params->args[0], loc); +} + +static bool intrinsic_clamp(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_expr *max; + + if (!(max = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, params->args[0], params->args[1], loc))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, &max->node, params->args[2], loc); +} + +static bool intrinsic_cross(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_swizzle *arg1_swzl1, *arg1_swzl2, *arg2_swzl1, *arg2_swzl2; + struct hlsl_ir_node *arg1 = params->args[0], *arg2 = params->args[1]; + struct hlsl_ir_node *arg1_cast, *arg2_cast, *mul1_neg; + struct hlsl_ir_expr *mul1, *mul2; + struct hlsl_type *cast_type; + enum hlsl_base_type base; + + if (arg1->data_type->base_type == HLSL_TYPE_HALF && arg2->data_type->base_type == HLSL_TYPE_HALF) + base = HLSL_TYPE_HALF; + else + base = HLSL_TYPE_FLOAT; + + cast_type = hlsl_get_vector_type(ctx, base, 3); + + if (!(arg1_cast = add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc))) + return false; + + if (!(arg2_cast = add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc))) + return false; + + if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) + return false; + list_add_tail(params->instrs, &arg1_swzl1->node.entry); + + if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) + return false; + list_add_tail(params->instrs, &arg2_swzl1->node.entry); + + if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, + &arg1_swzl1->node, &arg2_swzl1->node, loc))) + return false; + + if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &mul1->node, *loc))) + return false; + list_add_tail(params->instrs, &mul1_neg->entry); + + if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) + return false; + list_add_tail(params->instrs, &arg1_swzl2->node.entry); + + if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) + return false; + list_add_tail(params->instrs, &arg2_swzl2->node.entry); + + if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, + &arg1_swzl2->node, &arg2_swzl2->node, loc))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, &mul2->node, mul1_neg, loc); +} + +static bool intrinsic_floor(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FLOOR, arg, loc); +} + +static bool intrinsic_max(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, params->args[0], params->args[1], loc); +} + +static bool intrinsic_min(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, params->args[0], params->args[1], loc); +} + +static bool intrinsic_pow(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *log, *exp; + struct hlsl_ir_expr *mul; + + if (!(log = hlsl_new_unary_expr(ctx, HLSL_OP1_LOG2, params->args[0], *loc))) + return false; + list_add_tail(params->instrs, &log->entry); + + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, params->args[1], log, loc))) + return false; + + if (!(exp = hlsl_new_unary_expr(ctx, HLSL_OP1_EXP2, &mul->node, *loc))) + return false; + list_add_tail(params->instrs, &exp->entry); + return true; +} + +static bool intrinsic_round(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ROUND, arg, loc); +} + +static bool intrinsic_saturate(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, arg, loc); +} + +static const struct intrinsic_function +{ + const char *name; + int param_count; + bool check_numeric; + bool (*handler)(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc); +} +intrinsic_functions[] = +{ + /* Note: these entries should be kept in alphabetical order. */ + {"abs", 1, true, intrinsic_abs}, + {"clamp", 3, true, intrinsic_clamp}, + {"cross", 2, true, intrinsic_cross}, + {"floor", 1, true, intrinsic_floor}, + {"max", 2, true, intrinsic_max}, + {"min", 2, true, intrinsic_min}, + {"pow", 2, true, intrinsic_pow}, + {"round", 1, true, intrinsic_round}, + {"saturate", 1, true, intrinsic_saturate}, +}; + +static int intrinsic_function_name_compare(const void *a, const void *b) +{ + const struct intrinsic_function *func = b; + + return strcmp(a, func->name); +} + +static struct list *add_call(struct hlsl_ctx *ctx, const char *name, + struct parse_initializer *params, struct vkd3d_shader_location loc) +{ + const struct hlsl_ir_function_decl *decl; + struct intrinsic_function *intrinsic; + + if ((decl = find_function_call(ctx, name, params))) + { + hlsl_fixme(ctx, &loc, "Call to user-defined function \"%s\".", name); + free_parse_initializer(params); + return NULL; + } + else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), + sizeof(*intrinsic_functions), intrinsic_function_name_compare))) + { + if (intrinsic->param_count >= 0 && params->args_count != intrinsic->param_count) + { + hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to function '%s': expected %u, but got %u.\n", + name, intrinsic->param_count, params->args_count); + free_parse_initializer(params); + return NULL; + } + + if (intrinsic->check_numeric) + { + unsigned int i; + + for (i = 0; i < params->args_count; ++i) + { + if (params->args[i]->data_type->type > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, params->args[i]->data_type))) + hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument %u of '%s': expected a numeric type, but got '%s'.\n", + i + 1, name, string->buffer); + hlsl_release_string_buffer(ctx, string); + free_parse_initializer(params); + return NULL; + } + } + } + + if (!intrinsic->handler(ctx, params, &loc)) + { + free_parse_initializer(params); + return NULL; + } + } + else + { + hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Function \"%s\" is not defined.", name); + free_parse_initializer(params); + return NULL; + } + vkd3d_free(params->args); + return params->instrs; +} + +static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, + struct parse_initializer *params, struct vkd3d_shader_location loc) +{ + unsigned int i, writemask_offset = 0; + struct hlsl_ir_store *store; + static unsigned int counter; + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + char name[23]; + + if (type->type == HLSL_CLASS_MATRIX) + hlsl_fixme(ctx, &loc, "Matrix constructor."); + + sprintf(name, "", counter++); + if (!(var = hlsl_new_synthetic_var(ctx, name, type, loc))) + return NULL; + + for (i = 0; i < params->args_count; ++i) + { + struct hlsl_ir_node *arg = params->args[i]; + unsigned int width; + + if (arg->data_type->type == HLSL_CLASS_OBJECT) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, arg->data_type))) + hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Invalid type %s for constructor argument.", string->buffer); + hlsl_release_string_buffer(ctx, string); + continue; + } + width = hlsl_type_component_count(arg->data_type); + + if (width > 4) + { + FIXME("Constructor argument with %u components.\n", width); + continue; + } + + if (!(arg = add_implicit_conversion(ctx, params->instrs, arg, + hlsl_get_vector_type(ctx, type->base_type, width), &arg->loc))) + continue; + + if (!(store = hlsl_new_store(ctx, var, NULL, arg, + ((1u << width) - 1) << writemask_offset, arg->loc))) + return NULL; + list_add_tail(params->instrs, &store->node.entry); + + writemask_offset += width; + } + + if (!(load = hlsl_new_var_load(ctx, var, loc))) + return NULL; + list_add_tail(params->instrs, &load->node.entry); + + vkd3d_free(params->args); + return params->instrs; +} + +static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + struct hlsl_ir_load *object_load; + + if (object_type->type != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE + || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, object_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Type '%s' does not have methods.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + /* Only HLSL_IR_LOAD can return an object. */ + object_load = hlsl_ir_load(object); + + if (!strcmp(name, "Load") + && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBE + && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBEARRAY) + { + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + struct hlsl_ir_resource_load *load; + struct hlsl_ir_node *coords; + + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + FIXME("'Load' method for multi-sample textures.\n"); + return false; + } + + if (params->args_count < 1 || params->args_count > 3) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method 'Load': expected 1, 2, or 3, but got %u.", params->args_count); + return false; + } + if (params->args_count >= 2) + hlsl_fixme(ctx, loc, "Offset parameter."); + if (params->args_count == 3) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + + /* +1 for the mipmap level */ + if (!(coords = add_implicit_conversion(ctx, instrs, params->args[0], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + 1), loc))) + return false; + + if (!(load = hlsl_new_resource_load(ctx, object_type->e.resource_format, HLSL_RESOURCE_LOAD, + object_load->src.var, object_load->src.offset.node, NULL, NULL, coords, NULL, loc))) + return false; + list_add_tail(instrs, &load->node.entry); + return true; + } + else if (!strcmp(name, "Sample") + && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS + && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) + { + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const struct hlsl_type *sampler_type; + struct hlsl_ir_resource_load *load; + struct hlsl_ir_node *offset = NULL; + struct hlsl_ir_load *sampler_load; + struct hlsl_ir_node *coords; + + if (params->args_count != 2 && params->args_count != 3) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method 'Sample': expected 2 or 3, but got %u.", params->args_count); + return false; + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + /* Only HLSL_IR_LOAD can return an object. */ + sampler_load = hlsl_ir_load(params->args[0]); + + if (!(coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + + if (params->args_count == 3) + { + if (!(offset = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim), loc))) + return false; + } + + if (!(load = hlsl_new_resource_load(ctx, object_type->e.resource_format, + HLSL_RESOURCE_SAMPLE, object_load->src.var, object_load->src.offset.node, + sampler_load->src.var, sampler_load->src.offset.node, coords, offset, loc))) + return false; + list_add_tail(instrs, &load->node.entry); + return true; + } + else if ((!strcmp(name, "Gather") || !strcmp(name, "GatherRed") || !strcmp(name, "GatherBlue") + || !strcmp(name, "GatherGreen") || !strcmp(name, "GatherAlpha")) + && (object_type->sampler_dim == HLSL_SAMPLER_DIM_2D + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DARRAY + || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBE + || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBEARRAY)) + { + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + enum hlsl_resource_load_type load_type; + const struct hlsl_type *sampler_type; + struct hlsl_ir_resource_load *load; + struct hlsl_ir_node *offset = NULL; + struct hlsl_ir_load *sampler_load; + struct hlsl_type *result_type; + struct hlsl_ir_node *coords; + unsigned int read_channel; + + if (!strcmp(name, "GatherGreen")) + { + load_type = HLSL_RESOURCE_GATHER_GREEN; + read_channel = 1; + } + else if (!strcmp(name, "GatherBlue")) + { + load_type = HLSL_RESOURCE_GATHER_BLUE; + read_channel = 2; + } + else if (!strcmp(name, "GatherAlpha")) + { + load_type = HLSL_RESOURCE_GATHER_ALPHA; + read_channel = 3; + } + else + { + load_type = HLSL_RESOURCE_GATHER_RED; + read_channel = 0; + } + + if (!strcmp(name, "Gather")) + { + if (params->args_count != 2 && params->args_count != 3) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method 'Gather': expected 2 or 3, but got %u.", params->args_count); + return false; + } + } + else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected 2, 3, 4, 6 or 7, but got %u.", + name, params->args_count); + return false; + } + + if (params->args_count == 4 || params->args_count == 7) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + + if (params->args_count == 6 || params->args_count == 7) + hlsl_fixme(ctx, loc, "Multiple Gather() offset parameters."); + + if (params->args_count == 3 || params->args_count == 4) + { + if (!(offset = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim), loc))) + return false; + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (read_channel >= object_type->e.resource_format->dimx) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Method %s() requires at least %u channels.", name, read_channel + 1); + return false; + } + + result_type = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); + + /* Only HLSL_IR_LOAD can return an object. */ + sampler_load = hlsl_ir_load(params->args[0]); + + if (!(coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + + if (!(load = hlsl_new_resource_load(ctx, result_type, + load_type, object_load->src.var, object_load->src.offset.node, + sampler_load->src.var, sampler_load->src.offset.node, coords, offset, loc))) + return false; + list_add_tail(instrs, &load->node.entry); + return true; + } + else + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, object_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "Method '%s' is not defined on type '%s'.", name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } +} + +} + +%locations +%define parse.error verbose +%define api.prefix {hlsl_yy} +%define api.pure full +%expect 1 +%lex-param {yyscan_t scanner} +%parse-param {void *scanner} +%parse-param {struct hlsl_ctx *ctx} + +%union +{ + struct hlsl_type *type; + INT intval; + FLOAT floatval; + BOOL boolval; + char *name; + DWORD modifiers; + struct hlsl_ir_node *instr; + struct list *list; + struct parse_function function; + struct parse_parameter parameter; + struct parse_initializer initializer; + struct parse_array_sizes arrays; + struct parse_variable_def *variable_def; + struct parse_if_body if_body; + enum parse_assign_op assign_op; + struct hlsl_reg_reservation reg_reservation; + struct parse_colon_attribute colon_attribute; + struct hlsl_semantic semantic; + enum hlsl_buffer_type buffer_type; + enum hlsl_sampler_dim sampler_dim; +} + +%token KW_BLENDSTATE +%token KW_BREAK +%token KW_BUFFER +%token KW_CBUFFER +%token KW_COLUMN_MAJOR +%token KW_COMPILE +%token KW_CONST +%token KW_CONTINUE +%token KW_DEPTHSTENCILSTATE +%token KW_DEPTHSTENCILVIEW +%token KW_DISCARD +%token KW_DO +%token KW_DOUBLE +%token KW_ELSE +%token KW_EXTERN +%token KW_FALSE +%token KW_FOR +%token KW_GEOMETRYSHADER +%token KW_GROUPSHARED +%token KW_IF +%token KW_IN +%token KW_INLINE +%token KW_INOUT +%token KW_MATRIX +%token KW_NAMESPACE +%token KW_NOINTERPOLATION +%token KW_OUT +%token KW_PASS +%token KW_PIXELSHADER +%token KW_PRECISE +%token KW_RASTERIZERSTATE +%token KW_RENDERTARGETVIEW +%token KW_RETURN +%token KW_REGISTER +%token KW_ROW_MAJOR +%token KW_SAMPLER +%token KW_SAMPLER1D +%token KW_SAMPLER2D +%token KW_SAMPLER3D +%token KW_SAMPLERCUBE +%token KW_SAMPLER_STATE +%token KW_SAMPLERCOMPARISONSTATE +%token KW_SHARED +%token KW_STATEBLOCK +%token KW_STATEBLOCK_STATE +%token KW_STATIC +%token KW_STRING +%token KW_STRUCT +%token KW_SWITCH +%token KW_TBUFFER +%token KW_TECHNIQUE +%token KW_TECHNIQUE10 +%token KW_TEXTURE +%token KW_TEXTURE1D +%token KW_TEXTURE1DARRAY +%token KW_TEXTURE2D +%token KW_TEXTURE2DARRAY +%token KW_TEXTURE2DMS +%token KW_TEXTURE2DMSARRAY +%token KW_TEXTURE3D +%token KW_TEXTURECUBE +%token KW_TEXTURECUBEARRAY +%token KW_TRUE +%token KW_TYPEDEF +%token KW_UNIFORM +%token KW_VECTOR +%token KW_VERTEXSHADER +%token KW_VOID +%token KW_VOLATILE +%token KW_WHILE + +%token OP_INC +%token OP_DEC +%token OP_AND +%token OP_OR +%token OP_EQ +%token OP_LEFTSHIFT +%token OP_LEFTSHIFTASSIGN +%token OP_RIGHTSHIFT +%token OP_RIGHTSHIFTASSIGN +%token OP_ELLIPSIS +%token OP_LE +%token OP_GE +%token OP_NE +%token OP_ADDASSIGN +%token OP_SUBASSIGN +%token OP_MULASSIGN +%token OP_DIVASSIGN +%token OP_MODASSIGN +%token OP_ANDASSIGN +%token OP_ORASSIGN +%token OP_XORASSIGN +%token OP_UNKNOWN1 +%token OP_UNKNOWN2 +%token OP_UNKNOWN3 +%token OP_UNKNOWN4 + +%token C_FLOAT + +%token C_INTEGER +%token PRE_LINE + +%type add_expr +%type assignment_expr +%type bitand_expr +%type bitor_expr +%type bitxor_expr +%type compound_statement +%type conditional_expr +%type declaration +%type declaration_statement +%type equality_expr +%type expr +%type expr_statement +%type field +%type fields_list +%type initializer_expr +%type jump_statement +%type logicand_expr +%type logicor_expr +%type loop_statement +%type mul_expr +%type param_list +%type parameters +%type postfix_expr +%type primary_expr +%type relational_expr +%type selection_statement +%type shift_expr +%type statement +%type statement_list +%type struct_declaration +%type type_specs +%type unary_expr +%type variables_def +%type variables_def_optional + +%token VAR_IDENTIFIER +%token NEW_IDENTIFIER +%token STRING +%token TYPE_IDENTIFIER + +%type arrays + +%type assign_op + +%type boolean + +%type buffer_type + +%type colon_attribute + +%type func_declaration +%type func_prototype + +%type complex_initializer +%type func_arguments +%type initializer_expr_list + +%type if_body + +%type input_mod +%type input_mods +%type var_modifiers + +%type any_identifier +%type var_identifier + +%type parameter + +%type register_opt + +%type texture_type + +%type semantic + +%type field_type +%type named_struct_spec +%type unnamed_struct_spec +%type struct_spec +%type type +%type typedef_type + +%type type_spec +%type variable_decl +%type variable_def + +%% + +hlsl_prog: + %empty + | hlsl_prog func_declaration + { + const struct hlsl_ir_function_decl *decl; + + decl = get_func_decl(&ctx->functions, $2.name, $2.decl->parameters); + if (decl && !decl->func->intrinsic) + { + if (decl->has_body && $2.decl->has_body) + { + hlsl_error(ctx, &$2.decl->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "Function \"%s\" is already defined.", $2.name); + hlsl_note(ctx, &decl->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously defined here.", $2.name); + YYABORT; + } + else if (!hlsl_types_are_equal(decl->return_type, $2.decl->return_type)) + { + hlsl_error(ctx, &$2.decl->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "Function \"%s\" was already declared with a different return type.", $2.name); + hlsl_note(ctx, &decl->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", $2.name); + YYABORT; + } + } + + hlsl_add_function(ctx, $2.name, $2.decl, false); + } + | hlsl_prog buffer_declaration buffer_body + | hlsl_prog declaration_statement + { + if (!list_empty($2)) + hlsl_fixme(ctx, &@2, "Uniform initializer."); + destroy_instr_list($2); + } + | hlsl_prog preproc_directive + | hlsl_prog ';' + +buffer_declaration: + buffer_type any_identifier colon_attribute + { + if ($3.semantic.name) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); + + if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, @2))) + YYABORT; + } + +buffer_body: + '{' declaration_statement_list '}' + { + ctx->cur_buffer = ctx->globals_buffer; + } + +buffer_type: + KW_CBUFFER + { + $$ = HLSL_BUFFER_CONSTANT; + } + | KW_TBUFFER + { + $$ = HLSL_BUFFER_TEXTURE; + } + +declaration_statement_list: + declaration_statement + | declaration_statement_list declaration_statement + +preproc_directive: + PRE_LINE STRING + { + const char **new_array = NULL; + + ctx->location.line = $1; + if (strcmp($2, ctx->location.source_name)) + new_array = hlsl_realloc(ctx, ctx->source_files, + sizeof(*ctx->source_files) * (ctx->source_files_count + 1)); + + if (new_array) + { + ctx->source_files = new_array; + ctx->source_files[ctx->source_files_count++] = $2; + ctx->location.source_name = $2; + } + else + { + vkd3d_free($2); + } + } + +struct_declaration: + var_modifiers struct_spec variables_def_optional ';' + { + struct hlsl_type *type; + unsigned int modifiers = $1; + + if (!$3) + { + if (!$2->name) + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Anonymous struct type must declare a variable."); + if (modifiers) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers are not allowed on struct type declarations."); + } + + if (!(type = apply_type_modifiers(ctx, $2, &modifiers, @1))) + YYABORT; + $$ = declare_vars(ctx, type, modifiers, $3); + } + +struct_spec: + named_struct_spec + | unnamed_struct_spec + +named_struct_spec: + KW_STRUCT any_identifier '{' fields_list '}' + { + bool ret; + + $$ = hlsl_new_struct_type(ctx, $2, $4); + + if (hlsl_get_var(ctx->cur_scope, $2)) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "\"%s\" is already declared as a variable.", $2); + YYABORT; + } + + ret = hlsl_scope_add_type(ctx->cur_scope, $$); + if (!ret) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "Struct \"%s\" is already defined.", $2); + YYABORT; + } + } + +unnamed_struct_spec: + KW_STRUCT '{' fields_list '}' + { + $$ = hlsl_new_struct_type(ctx, NULL, $3); + } + +any_identifier: + VAR_IDENTIFIER + | TYPE_IDENTIFIER + | NEW_IDENTIFIER + +fields_list: + %empty + { + if (!($$ = make_empty_list(ctx))) + YYABORT; + } + | fields_list field + { + struct hlsl_struct_field *field, *next, *existing; + + $$ = $1; + LIST_FOR_EACH_ENTRY_SAFE(field, next, $2, struct hlsl_struct_field, entry) + { + if ((existing = get_struct_field($$, field->name))) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "Field \"%s\" is already defined.", field->name); + hlsl_note(ctx, &existing->loc, VKD3D_SHADER_LOG_ERROR, + "'%s' was previously defined here.", field->name); + vkd3d_free(field); + } + else + { + list_add_tail($$, &field->entry); + } + } + vkd3d_free($2); + } + +field_type: + type + | unnamed_struct_spec + +field: + var_modifiers field_type variables_def ';' + { + struct hlsl_type *type; + unsigned int modifiers = $1; + + if (!(type = apply_type_modifiers(ctx, $2, &modifiers, @1))) + YYABORT; + if (modifiers) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_modifiers_to_string(ctx, modifiers))) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers '%s' are not allowed on struct fields.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + $$ = gen_struct_fields(ctx, type, $3); + } + +func_declaration: + func_prototype compound_statement + { + $$ = $1; + $$.decl->has_body = true; + list_move_tail(&$$.decl->body.instrs, $2); + vkd3d_free($2); + hlsl_pop_scope(ctx); + } + | func_prototype ';' + { + $$ = $1; + hlsl_pop_scope(ctx); + } + +func_prototype: + /* var_modifiers is necessary to avoid shift/reduce conflicts. */ + var_modifiers type var_identifier '(' parameters ')' colon_attribute + { + struct hlsl_ir_var *var; + + if ($1) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers are not allowed on functions."); + YYABORT; + } + if ((var = hlsl_get_var(ctx->globals, $3))) + { + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "\"%s\" is already declared as a variable.", $3); + hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, + "\"%s\" was previously declared here.", $3); + YYABORT; + } + if (hlsl_types_are_equal($2, ctx->builtin_types.Void) && $7.semantic.name) + { + hlsl_error(ctx, &@7, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Semantics are not allowed on void functions."); + } + + if ($7.reg_reservation.type) + FIXME("Unexpected register reservation for a function.\n"); + + if (!($$.decl = hlsl_new_func_decl(ctx, $2, $5, &$7.semantic, @3))) + YYABORT; + $$.name = $3; + ctx->cur_function = $$.decl; + } + +compound_statement: + '{' '}' + { + if (!($$ = make_empty_list(ctx))) + YYABORT; + } + | '{' scope_start statement_list '}' + { + hlsl_pop_scope(ctx); + $$ = $3; + } + +scope_start: + %empty + { + hlsl_push_scope(ctx); + } + +var_identifier: + VAR_IDENTIFIER + | NEW_IDENTIFIER + +colon_attribute: + %empty + { + $$.semantic.name = NULL; + $$.reg_reservation.type = 0; + } + | semantic + { + $$.semantic = $1; + $$.reg_reservation.type = 0; + } + | register_opt + { + $$.semantic.name = NULL; + $$.reg_reservation = $1; + } + +semantic: + ':' any_identifier + { + char *p; + + for (p = $2 + strlen($2); p > $2 && isdigit(p[-1]); --p) + ; + $$.name = $2; + $$.index = atoi(p); + *p = 0; + } + +/* FIXME: Writemasks */ +register_opt: + ':' KW_REGISTER '(' any_identifier ')' + { + $$ = parse_reg_reservation($4); + vkd3d_free($4); + } + | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' + { + FIXME("Ignoring shader target %s in a register reservation.\n", debugstr_a($4)); + vkd3d_free($4); + + $$ = parse_reg_reservation($6); + vkd3d_free($6); + } + +parameters: + scope_start + { + if (!($$ = make_empty_list(ctx))) + YYABORT; + } + | scope_start param_list + { + $$ = $2; + } + +param_list: + parameter + { + if (!($$ = make_empty_list(ctx))) + YYABORT; + if (!add_func_parameter(ctx, $$, &$1, @1)) + { + ERR("Error adding function parameter %s.\n", $1.name); + YYABORT; + } + } + | param_list ',' parameter + { + $$ = $1; + if (!add_func_parameter(ctx, $$, &$3, @3)) + { + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "Parameter \"%s\" is already declared.", $3.name); + YYABORT; + } + } + +parameter: + input_mods var_modifiers type any_identifier colon_attribute + { + struct hlsl_type *type; + unsigned int modifiers = $2; + + if (!(type = apply_type_modifiers(ctx, $3, &modifiers, @2))) + YYABORT; + + $$.modifiers = $1 ? $1 : HLSL_STORAGE_IN; + $$.modifiers |= modifiers; + $$.type = type; + $$.name = $4; + $$.semantic = $5.semantic; + $$.reg_reservation = $5.reg_reservation; + } + +input_mods: + %empty + { + $$ = 0; + } + | input_mods input_mod + { + if ($1 & $2) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_modifiers_to_string(ctx, $2))) + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifier \"%s\" was already specified.", string->buffer); + hlsl_release_string_buffer(ctx, string); + YYABORT; + } + $$ = $1 | $2; + } + +input_mod: + KW_IN + { + $$ = HLSL_STORAGE_IN; + } + | KW_OUT + { + $$ = HLSL_STORAGE_OUT; + } + | KW_INOUT + { + $$ = HLSL_STORAGE_IN | HLSL_STORAGE_OUT; + } + +texture_type: + KW_TEXTURE1D + { + $$ = HLSL_SAMPLER_DIM_1D; + } + | KW_TEXTURE2D + { + $$ = HLSL_SAMPLER_DIM_2D; + } + | KW_TEXTURE3D + { + $$ = HLSL_SAMPLER_DIM_3D; + } + | KW_TEXTURECUBE + { + $$ = HLSL_SAMPLER_DIM_CUBE; + } + | KW_TEXTURE1DARRAY + { + $$ = HLSL_SAMPLER_DIM_1DARRAY; + } + | KW_TEXTURE2DARRAY + { + $$ = HLSL_SAMPLER_DIM_2DARRAY; + } + | KW_TEXTURECUBEARRAY + { + $$ = HLSL_SAMPLER_DIM_CUBEARRAY; + } + +type: + KW_VECTOR '<' type ',' C_INTEGER '>' + { + if ($3->type != HLSL_CLASS_SCALAR) + { + struct vkd3d_string_buffer *string; + + string = hlsl_type_to_string(ctx, $3); + if (string) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Vector base type %s is not scalar.", string->buffer); + hlsl_release_string_buffer(ctx, string); + YYABORT; + } + if ($5 < 1 || $5 > 4) + { + hlsl_error(ctx, &@5, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, + "Vector size %d is not between 1 and 4.", $5); + YYABORT; + } + + $$ = hlsl_get_vector_type(ctx, $3->base_type, $5); + } + | KW_VECTOR + { + $$ = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + } + | KW_MATRIX '<' type ',' C_INTEGER ',' C_INTEGER '>' + { + if ($3->type != HLSL_CLASS_SCALAR) + { + struct vkd3d_string_buffer *string; + + string = hlsl_type_to_string(ctx, $3); + if (string) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Matrix base type %s is not scalar.", string->buffer); + hlsl_release_string_buffer(ctx, string); + YYABORT; + } + if ($5 < 1 || $5 > 4) + { + hlsl_error(ctx, &@5, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, + "Matrix row count %d is not between 1 and 4.", $5); + YYABORT; + } + if ($7 < 1 || $7 > 4) + { + hlsl_error(ctx, &@7, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, + "Matrix column count %d is not between 1 and 4.", $7); + YYABORT; + } + + $$ = hlsl_get_matrix_type(ctx, $3->base_type, $7, $5); + } + | KW_MATRIX + { + $$ = hlsl_get_matrix_type(ctx, HLSL_TYPE_FLOAT, 4, 4); + } + | KW_VOID + { + $$ = ctx->builtin_types.Void; + } + | KW_SAMPLER + { + $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_GENERIC]; + } + | KW_SAMPLER1D + { + $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_1D]; + } + | KW_SAMPLER2D + { + $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_2D]; + } + | KW_SAMPLER3D + { + $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_3D]; + } + | KW_SAMPLERCUBE + { + $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_3D]; + } + | KW_TEXTURE + { + $$ = hlsl_new_texture_type(ctx, HLSL_SAMPLER_DIM_GENERIC, NULL); + } + | texture_type + { + $$ = hlsl_new_texture_type(ctx, $1, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4)); + } + | texture_type '<' type '>' + { + if ($3->type > HLSL_CLASS_VECTOR) + { + struct vkd3d_string_buffer *string; + + string = hlsl_type_to_string(ctx, $3); + if (string) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Texture data type %s is not scalar or vector.\n", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + $$ = hlsl_new_texture_type(ctx, $1, $3); + } + | TYPE_IDENTIFIER + { + $$ = hlsl_get_type(ctx->cur_scope, $1, true); + vkd3d_free($1); + } + | KW_STRUCT TYPE_IDENTIFIER + { + $$ = hlsl_get_type(ctx->cur_scope, $2, true); + if ($$->type != HLSL_CLASS_STRUCT) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "\"%s\" redefined as a structure.", $2); + vkd3d_free($2); + } + +declaration_statement: + declaration + | struct_declaration + | typedef + { + if (!($$ = make_empty_list(ctx))) + YYABORT; + } + +typedef_type: + type + | struct_spec + +typedef: + KW_TYPEDEF var_modifiers typedef_type type_specs ';' + { + if ($2 & ~HLSL_TYPE_MODIFIERS_MASK) + { + struct parse_variable_def *v, *v_next; + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Storage modifiers are not allowed on typedefs."); + LIST_FOR_EACH_ENTRY_SAFE(v, v_next, $4, struct parse_variable_def, entry) + vkd3d_free(v); + vkd3d_free($4); + YYABORT; + } + if (!add_typedef(ctx, $2, $3, $4)) + YYABORT; + } + +type_specs: + type_spec + { + if (!($$ = make_empty_list(ctx))) + YYABORT; + list_add_head($$, &$1->entry); + } + | type_specs ',' type_spec + { + $$ = $1; + list_add_tail($$, &$3->entry); + } + +type_spec: + any_identifier arrays + { + $$ = hlsl_alloc(ctx, sizeof(*$$)); + $$->loc = @1; + $$->name = $1; + $$->arrays = $2; + } + +declaration: + var_modifiers type variables_def ';' + { + struct hlsl_type *type; + unsigned int modifiers = $1; + + if (!(type = apply_type_modifiers(ctx, $2, &modifiers, @1))) + YYABORT; + $$ = declare_vars(ctx, type, modifiers, $3); + } + +variables_def_optional: + %empty + { + $$ = NULL; + } + | variables_def + +variables_def: + variable_def + { + if (!($$ = make_empty_list(ctx))) + YYABORT; + list_add_head($$, &$1->entry); + } + | variables_def ',' variable_def + { + $$ = $1; + list_add_tail($$, &$3->entry); + } + +variable_decl: + any_identifier arrays colon_attribute + { + $$ = hlsl_alloc(ctx, sizeof(*$$)); + $$->loc = @1; + $$->name = $1; + $$->arrays = $2; + $$->semantic = $3.semantic; + $$->reg_reservation = $3.reg_reservation; + } + +state: + any_identifier '=' expr ';' + { + vkd3d_free($1); + hlsl_free_instr_list($3); + } + +state_block_start: + %empty + { + ctx->in_state_block = 1; + } + +state_block: + %empty + | state_block state + +variable_def: + variable_decl + | variable_decl '=' complex_initializer + { + $$ = $1; + $$->initializer = $3; + } + | variable_decl '{' state_block_start state_block '}' + { + $$ = $1; + ctx->in_state_block = 0; + } + +arrays: + %empty + { + $$.sizes = NULL; + $$.count = 0; + } + | '[' expr ']' arrays + { + unsigned int size = evaluate_array_dimension(node_from_list($2)); + uint32_t *new_array; + + destroy_instr_list($2); + + $$ = $4; + + if (!size) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, + "Array size is not a positive integer constant."); + vkd3d_free($$.sizes); + YYABORT; + } + + if (size > 65536) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, + "Array size %u is not between 1 and 65536.", size); + vkd3d_free($$.sizes); + YYABORT; + } + + if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) + { + vkd3d_free($$.sizes); + YYABORT; + } + $$.sizes = new_array; + $$.sizes[$$.count++] = size; + } + +var_modifiers: + %empty + { + $$ = 0; + } + | KW_EXTERN var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_EXTERN, @1); + } + | KW_NOINTERPOLATION var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOINTERPOLATION, @1); + } + | KW_PRECISE var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, @1); + } + | KW_SHARED var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, @1); + } + | KW_GROUPSHARED var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_GROUPSHARED, @1); + } + | KW_STATIC var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_STATIC, @1); + } + | KW_UNIFORM var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_UNIFORM, @1); + } + | KW_VOLATILE var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_VOLATILE, @1); + } + | KW_CONST var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_CONST, @1); + } + | KW_ROW_MAJOR var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_ROW_MAJOR, @1); + } + | KW_COLUMN_MAJOR var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_COLUMN_MAJOR, @1); + } + +complex_initializer: + initializer_expr + { + $$.args_count = 1; + if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) + YYABORT; + $$.args[0] = node_from_list($1); + $$.instrs = $1; + } + | '{' initializer_expr_list '}' + { + $$ = $2; + } + | '{' initializer_expr_list ',' '}' + { + $$ = $2; + } + +initializer_expr: + assignment_expr + +initializer_expr_list: + initializer_expr + { + $$.args_count = 1; + if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) + YYABORT; + $$.args[0] = node_from_list($1); + $$.instrs = $1; + } + | initializer_expr_list ',' initializer_expr + { + $$ = $1; + if (!($$.args = hlsl_realloc(ctx, $$.args, ($$.args_count + 1) * sizeof(*$$.args)))) + YYABORT; + $$.args[$$.args_count++] = node_from_list($3); + list_move_tail($$.instrs, $3); + vkd3d_free($3); + } + +boolean: + KW_TRUE + { + $$ = TRUE; + } + | KW_FALSE + { + $$ = FALSE; + } + +statement_list: + statement + | statement_list statement + { + $$ = $1; + list_move_tail($$, $2); + vkd3d_free($2); + } + +statement: + declaration_statement + | expr_statement + | compound_statement + | jump_statement + | selection_statement + | loop_statement + +jump_statement: + KW_RETURN expr ';' + { + if (!add_return(ctx, $2, node_from_list($2), @1)) + YYABORT; + $$ = $2; + } + | KW_RETURN ';' + { + if (!($$ = make_empty_list(ctx))) + YYABORT; + if (!add_return(ctx, $$, NULL, @1)) + YYABORT; + } + +selection_statement: + KW_IF '(' expr ')' if_body + { + struct hlsl_ir_node *condition = node_from_list($3); + struct hlsl_ir_if *instr; + + if (!(instr = hlsl_new_if(ctx, condition, @1))) + YYABORT; + list_move_tail(&instr->then_instrs.instrs, $5.then_instrs); + if ($5.else_instrs) + list_move_tail(&instr->else_instrs.instrs, $5.else_instrs); + vkd3d_free($5.then_instrs); + vkd3d_free($5.else_instrs); + if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, condition->data_type))) + hlsl_error(ctx, &instr->node.loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "if condition type %s is not scalar.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + $$ = $3; + list_add_tail($$, &instr->node.entry); + } + +if_body: + statement + { + $$.then_instrs = $1; + $$.else_instrs = NULL; + } + | statement KW_ELSE statement + { + $$.then_instrs = $1; + $$.else_instrs = $3; + } + +loop_statement: + KW_WHILE '(' expr ')' statement + { + $$ = create_loop(ctx, LOOP_WHILE, NULL, $3, NULL, $5, @1); + } + | KW_DO statement KW_WHILE '(' expr ')' ';' + { + $$ = create_loop(ctx, LOOP_DO_WHILE, NULL, $5, NULL, $2, @1); + } + | KW_FOR '(' scope_start expr_statement expr_statement expr ')' statement + { + $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); + hlsl_pop_scope(ctx); + } + | KW_FOR '(' scope_start declaration expr_statement expr ')' statement + { + $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); + hlsl_pop_scope(ctx); + } + +expr_statement: + ';' + { + if (!($$ = make_empty_list(ctx))) + YYABORT; + } + | expr ';' + { + $$ = $1; + } + +func_arguments: + %empty + { + $$.args = NULL; + $$.args_count = 0; + if (!($$.instrs = make_empty_list(ctx))) + YYABORT; + } + | initializer_expr_list + +primary_expr: + C_FLOAT + { + struct hlsl_ir_constant *c; + + if (!(c = hlsl_alloc(ctx, sizeof(*c)))) + YYABORT; + init_node(&c->node, HLSL_IR_CONSTANT, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), @1); + c->value[0].f = $1; + if (!($$ = make_list(ctx, &c->node))) + YYABORT; + } + | C_INTEGER + { + struct hlsl_ir_constant *c; + + if (!(c = hlsl_alloc(ctx, sizeof(*c)))) + YYABORT; + init_node(&c->node, HLSL_IR_CONSTANT, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), @1); + c->value[0].i = $1; + if (!($$ = make_list(ctx, &c->node))) + YYABORT; + } + | boolean + { + struct hlsl_ir_constant *c; + + if (!(c = hlsl_alloc(ctx, sizeof(*c)))) + YYABORT; + init_node(&c->node, HLSL_IR_CONSTANT, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), @1); + c->value[0].b = $1; + if (!($$ = make_list(ctx, &c->node))) + YYABORT; + } + | VAR_IDENTIFIER + { + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + + if (!(var = hlsl_get_var(ctx->cur_scope, $1))) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable \"%s\" is not defined.", $1); + YYABORT; + } + if (!(load = hlsl_new_var_load(ctx, var, @1))) + YYABORT; + if (!($$ = make_list(ctx, &load->node))) + YYABORT; + } + | '(' expr ')' + { + $$ = $2; + } + | var_identifier '(' func_arguments ')' + { + if (!($$ = add_call(ctx, $1, &$3, @1))) + YYABORT; + } + | NEW_IDENTIFIER + { + if (ctx->in_state_block) + { + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + + if (!(var = hlsl_new_synthetic_var(ctx, "", + hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), @1))) + YYABORT; + if (!(load = hlsl_new_var_load(ctx, var, @1))) + YYABORT; + if (!($$ = make_list(ctx, &load->node))) + YYABORT; + } + else + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Identifier \"%s\" is not declared.\n", $1); + YYABORT; + } + } + +postfix_expr: + primary_expr + | postfix_expr OP_INC + { + if (!add_increment(ctx, $1, false, true, @2)) + { + destroy_instr_list($1); + YYABORT; + } + $$ = $1; + } + | postfix_expr OP_DEC + { + if (!add_increment(ctx, $1, true, true, @2)) + { + destroy_instr_list($1); + YYABORT; + } + $$ = $1; + } + | postfix_expr '.' any_identifier + { + struct hlsl_ir_node *node = node_from_list($1); + + if (node->data_type->type == HLSL_CLASS_STRUCT) + { + struct hlsl_type *type = node->data_type; + struct hlsl_struct_field *field; + + if (!(field = get_struct_field(type->e.elements, $3))) + { + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", $3); + YYABORT; + } + + if (!add_record_load(ctx, $1, node, field, @2)) + YYABORT; + $$ = $1; + } + else if (node->data_type->type <= HLSL_CLASS_LAST_NUMERIC) + { + struct hlsl_ir_swizzle *swizzle; + + if (!(swizzle = get_swizzle(ctx, node, $3, &@3))) + { + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle \"%s\".", $3); + YYABORT; + } + list_add_tail($1, &swizzle->node.entry); + $$ = $1; + } + else + { + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid subscript \"%s\".", $3); + YYABORT; + } + } + | postfix_expr '[' expr ']' + { + struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); + struct hlsl_ir_expr *cast; + + list_move_tail($1, $3); + vkd3d_free($3); + + if (index->data_type->type != HLSL_CLASS_SCALAR) + { + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Array index is not scalar."); + destroy_instr_list($1); + YYABORT; + } + + if (!(cast = hlsl_new_cast(ctx, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &index->loc))) + { + destroy_instr_list($1); + YYABORT; + } + list_add_tail($1, &cast->node.entry); + + if (!add_array_load(ctx, $1, array, &cast->node, @2)) + { + destroy_instr_list($1); + YYABORT; + } + $$ = $1; + } + + /* var_modifiers is necessary to avoid shift/reduce conflicts. */ + | var_modifiers type '(' initializer_expr_list ')' + { + if ($1) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers are not allowed on constructors."); + free_parse_initializer(&$4); + YYABORT; + } + if ($2->type > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, $2))) + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Constructor data type %s is not numeric.", string->buffer); + hlsl_release_string_buffer(ctx, string); + free_parse_initializer(&$4); + YYABORT; + } + if ($2->dimx * $2->dimy != initializer_size(&$4)) + { + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected %u components in constructor, but got %u.", + $2->dimx * $2->dimy, initializer_size(&$4)); + free_parse_initializer(&$4); + YYABORT; + } + + if (!($$ = add_constructor(ctx, $2, &$4, @2))) + { + free_parse_initializer(&$4); + YYABORT; + } + } + | postfix_expr '.' any_identifier '(' func_arguments ')' + { + struct hlsl_ir_node *object = node_from_list($1); + + list_move_tail($1, $5.instrs); + vkd3d_free($5.instrs); + + if (!add_method_call(ctx, $1, object, $3, &$5, &@3)) + { + hlsl_free_instr_list($1); + vkd3d_free($5.args); + YYABORT; + } + vkd3d_free($5.args); + $$ = $1; + } + +unary_expr: + postfix_expr + | OP_INC unary_expr + { + if (!add_increment(ctx, $2, false, false, @1)) + { + destroy_instr_list($2); + YYABORT; + } + $$ = $2; + } + | OP_DEC unary_expr + { + if (!add_increment(ctx, $2, true, false, @1)) + { + destroy_instr_list($2); + YYABORT; + } + $$ = $2; + } + | '+' unary_expr + { + $$ = $2; + } + | '-' unary_expr + { + add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_list($2), &@1); + $$ = $2; + } + | '~' unary_expr + { + add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_list($2), &@1); + $$ = $2; + } + | '!' unary_expr + { + add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_list($2), &@1); + $$ = $2; + } + /* var_modifiers is necessary to avoid shift/reduce conflicts. */ + | '(' var_modifiers type arrays ')' unary_expr + { + struct hlsl_type *src_type = node_from_list($6)->data_type; + struct hlsl_type *dst_type; + struct hlsl_ir_expr *cast; + unsigned int i; + + if ($2) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers are not allowed on casts."); + YYABORT; + } + + dst_type = $3; + for (i = 0; i < $4.count; ++i) + dst_type = hlsl_new_array_type(ctx, dst_type, $4.sizes[i]); + + if (!compatible_data_types(src_type, dst_type)) + { + struct vkd3d_string_buffer *src_string, *dst_string; + + src_string = hlsl_type_to_string(ctx, src_type); + dst_string = hlsl_type_to_string(ctx, dst_type); + if (src_string && dst_string) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Can't cast from %s to %s.", + src_string->buffer, dst_string->buffer); + hlsl_release_string_buffer(ctx, src_string); + hlsl_release_string_buffer(ctx, dst_string); + YYABORT; + } + + if (!(cast = hlsl_new_cast(ctx, node_from_list($6), dst_type, &@3))) + { + hlsl_free_instr_list($6); + YYABORT; + } + list_add_tail($6, &cast->node.entry); + $$ = $6; + } + +mul_expr: + unary_expr + | mul_expr '*' unary_expr + { + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, @2); + } + | mul_expr '/' unary_expr + { + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, @2); + } + | mul_expr '%' unary_expr + { + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, @2); + } + +add_expr: + mul_expr + | add_expr '+' mul_expr + { + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); + } + | add_expr '-' mul_expr + { + struct hlsl_ir_node *neg; + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), @2))) + YYABORT; + list_add_tail($3, &neg->entry); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); + } + +shift_expr: + add_expr + | shift_expr OP_LEFTSHIFT add_expr + { + $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); + } + | shift_expr OP_RIGHTSHIFT add_expr + { + $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); + } + +relational_expr: + shift_expr + | relational_expr '<' shift_expr + { + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, @2); + } + | relational_expr '>' shift_expr + { + $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, @2); + } + | relational_expr OP_LE shift_expr + { + $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, @2); + } + | relational_expr OP_GE shift_expr + { + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, @2); + } + +equality_expr: + relational_expr + | equality_expr OP_EQ relational_expr + { + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, @2); + } + | equality_expr OP_NE relational_expr + { + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, @2); + } + +bitand_expr: + equality_expr + | bitand_expr '&' equality_expr + { + $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); + } + +bitxor_expr: + bitand_expr + | bitxor_expr '^' bitand_expr + { + $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); + } + +bitor_expr: + bitxor_expr + | bitor_expr '|' bitxor_expr + { + $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); + } + +logicand_expr: + bitor_expr + | logicand_expr OP_AND bitor_expr + { + $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); + } + +logicor_expr: + logicand_expr + | logicor_expr OP_OR logicand_expr + { + $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); + } + +conditional_expr: + logicor_expr + | logicor_expr '?' expr ':' assignment_expr + { + hlsl_fixme(ctx, &@$, "Ternary operator."); + } + +assignment_expr: + + conditional_expr + | unary_expr assign_op assignment_expr + { + struct hlsl_ir_node *lhs = node_from_list($1), *rhs = node_from_list($3); + + if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); + YYABORT; + } + list_move_tail($3, $1); + vkd3d_free($1); + if (!add_assignment(ctx, $3, lhs, $2, rhs)) + YYABORT; + $$ = $3; + } + +assign_op: + '=' + { + $$ = ASSIGN_OP_ASSIGN; + } + | OP_ADDASSIGN + { + $$ = ASSIGN_OP_ADD; + } + | OP_SUBASSIGN + { + $$ = ASSIGN_OP_SUB; + } + | OP_MULASSIGN + { + $$ = ASSIGN_OP_MUL; + } + | OP_DIVASSIGN + { + $$ = ASSIGN_OP_DIV; + } + | OP_MODASSIGN + { + $$ = ASSIGN_OP_MOD; + } + | OP_LEFTSHIFTASSIGN + { + $$ = ASSIGN_OP_LSHIFT; + } + | OP_RIGHTSHIFTASSIGN + { + $$ = ASSIGN_OP_RSHIFT; + } + | OP_ANDASSIGN + { + $$ = ASSIGN_OP_AND; + } + | OP_ORASSIGN + { + $$ = ASSIGN_OP_OR; + } + | OP_XORASSIGN + { + $$ = ASSIGN_OP_XOR; + } + +expr: + assignment_expr + | expr ',' assignment_expr + { + $$ = $1; + list_move_tail($$, $3); + vkd3d_free($3); + } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c new file mode 100644 index 00000000000..62dbce5e466 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -0,0 +1,1713 @@ +/* + * HLSL optimization and code generation + * + * Copyright 2019-2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "hlsl.h" +#include + +/* Split uniforms into two variables representing the constant and temp + * registers, and copy the former to the latter, so that writes to uniforms + * work. */ +static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *temp) +{ + struct vkd3d_string_buffer *name; + struct hlsl_ir_var *uniform; + struct hlsl_ir_store *store; + struct hlsl_ir_load *load; + + /* Use the synthetic name for the temp, rather than the uniform, so that we + * can write the uniform name into the shader reflection data. */ + + if (!(uniform = hlsl_new_var(ctx, temp->name, temp->data_type, temp->loc, NULL, 0, &temp->reg_reservation))) + return; + list_add_before(&temp->scope_entry, &uniform->scope_entry); + list_add_tail(&ctx->extern_vars, &uniform->extern_entry); + uniform->is_uniform = 1; + uniform->is_param = temp->is_param; + uniform->buffer = temp->buffer; + + if (!(name = hlsl_get_string_buffer(ctx))) + return; + vkd3d_string_buffer_printf(name, "", temp->name); + temp->name = hlsl_strdup(ctx, name->buffer); + hlsl_release_string_buffer(ctx, name); + + if (!(load = hlsl_new_var_load(ctx, uniform, temp->loc))) + return; + list_add_head(instrs, &load->node.entry); + + if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) + return; + list_add_after(&load->node.entry, &store->node.entry); +} + +static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var, + struct hlsl_type *type, unsigned int field_offset, const struct hlsl_semantic *semantic) +{ + struct vkd3d_string_buffer *name; + struct hlsl_semantic new_semantic; + struct hlsl_ir_constant *offset; + struct hlsl_ir_store *store; + struct hlsl_ir_load *load; + struct hlsl_ir_var *input; + + if (!(name = hlsl_get_string_buffer(ctx))) + return; + vkd3d_string_buffer_printf(name, "", semantic->name, semantic->index); + if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) + { + hlsl_release_string_buffer(ctx, name); + return; + } + new_semantic.index = semantic->index; + if (!(input = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), type, var->loc, &new_semantic, 0, NULL))) + { + hlsl_release_string_buffer(ctx, name); + vkd3d_free((void *)new_semantic.name); + return; + } + hlsl_release_string_buffer(ctx, name); + input->is_input_semantic = 1; + input->is_param = var->is_param; + list_add_before(&var->scope_entry, &input->scope_entry); + list_add_tail(&ctx->extern_vars, &input->extern_entry); + + if (!(load = hlsl_new_var_load(ctx, input, var->loc))) + return; + list_add_head(instrs, &load->node.entry); + + if (!(offset = hlsl_new_uint_constant(ctx, field_offset, var->loc))) + return; + list_add_after(&load->node.entry, &offset->node.entry); + + if (!(store = hlsl_new_store(ctx, var, &offset->node, &load->node, 0, var->loc))) + return; + list_add_after(&offset->node.entry, &store->node.entry); +} + +static void prepend_input_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var, + struct hlsl_type *type, unsigned int field_offset) +{ + struct hlsl_struct_field *field; + + LIST_FOR_EACH_ENTRY(field, type->e.elements, struct hlsl_struct_field, entry) + { + if (field->type->type == HLSL_CLASS_STRUCT) + prepend_input_struct_copy(ctx, instrs, var, field->type, field_offset + field->reg_offset); + else if (field->semantic.name) + prepend_input_copy(ctx, instrs, var, field->type, field_offset + field->reg_offset, &field->semantic); + else + hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Field '%s' is missing a semantic.", field->name); + } +} + +/* Split inputs into two variables representing the semantic and temp registers, + * and copy the former to the latter, so that writes to input variables work. */ +static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) +{ + if (var->data_type->type == HLSL_CLASS_STRUCT) + prepend_input_struct_copy(ctx, instrs, var, var->data_type, 0); + else if (var->semantic.name) + prepend_input_copy(ctx, instrs, var, var->data_type, 0, &var->semantic); +} + +static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var, + struct hlsl_type *type, unsigned int field_offset, const struct hlsl_semantic *semantic) +{ + struct vkd3d_string_buffer *name; + struct hlsl_semantic new_semantic; + struct hlsl_ir_constant *offset; + struct hlsl_ir_store *store; + struct hlsl_ir_var *output; + struct hlsl_ir_load *load; + + if (!(name = hlsl_get_string_buffer(ctx))) + return; + vkd3d_string_buffer_printf(name, "", semantic->name, semantic->index); + if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) + { + hlsl_release_string_buffer(ctx, name); + return; + } + new_semantic.index = semantic->index; + if (!(output = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), type, var->loc, &new_semantic, 0, NULL))) + { + vkd3d_free((void *)new_semantic.name); + hlsl_release_string_buffer(ctx, name); + return; + } + hlsl_release_string_buffer(ctx, name); + output->is_output_semantic = 1; + output->is_param = var->is_param; + list_add_before(&var->scope_entry, &output->scope_entry); + list_add_tail(&ctx->extern_vars, &output->extern_entry); + + if (!(offset = hlsl_new_uint_constant(ctx, field_offset, var->loc))) + return; + list_add_tail(instrs, &offset->node.entry); + + if (!(load = hlsl_new_load(ctx, var, &offset->node, type, var->loc))) + return; + list_add_after(&offset->node.entry, &load->node.entry); + + if (!(store = hlsl_new_store(ctx, output, NULL, &load->node, 0, var->loc))) + return; + list_add_after(&load->node.entry, &store->node.entry); +} + +static void append_output_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var, + struct hlsl_type *type, unsigned int field_offset) +{ + struct hlsl_struct_field *field; + + LIST_FOR_EACH_ENTRY(field, type->e.elements, struct hlsl_struct_field, entry) + { + if (field->type->type == HLSL_CLASS_STRUCT) + append_output_struct_copy(ctx, instrs, var, field->type, field_offset + field->reg_offset); + else if (field->semantic.name) + append_output_copy(ctx, instrs, var, field->type, field_offset + field->reg_offset, &field->semantic); + else + hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Field '%s' is missing a semantic.", field->name); + } +} + +/* Split outputs into two variables representing the temp and semantic + * registers, and copy the former to the latter, so that reads from output + * variables work. */ +static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) +{ + if (var->data_type->type == HLSL_CLASS_STRUCT) + append_output_struct_copy(ctx, instrs, var, var->data_type, 0); + else if (var->semantic.name) + append_output_copy(ctx, instrs, var, var->data_type, 0, &var->semantic); +} + +static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), + struct hlsl_block *block, void *context) +{ + struct hlsl_ir_node *instr, *next; + bool progress = false; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->type == HLSL_IR_IF) + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + progress |= transform_ir(ctx, func, &iff->then_instrs, context); + progress |= transform_ir(ctx, func, &iff->else_instrs, context); + } + else if (instr->type == HLSL_IR_LOOP) + progress |= transform_ir(ctx, func, &hlsl_ir_loop(instr)->body, context); + + progress |= func(ctx, instr, context); + } + + return progress; +} + +/* Lower casts from vec1 to vecN to swizzles. */ +static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + const struct hlsl_type *src_type, *dst_type; + struct hlsl_type *dst_scalar_type; + struct hlsl_ir_expr *cast; + + if (instr->type != HLSL_IR_EXPR) + return false; + cast = hlsl_ir_expr(instr); + src_type = cast->operands[0].node->data_type; + dst_type = cast->node.data_type; + + if (cast->op == HLSL_OP1_CAST + && src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR + && src_type->dimx == 1) + { + struct hlsl_ir_swizzle *swizzle; + struct hlsl_ir_expr *new_cast; + + dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->base_type); + /* We need to preserve the cast since it might be doing more than just + * turning the scalar into a vector. */ + if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_scalar_type, &cast->node.loc))) + return false; + list_add_after(&cast->node.entry, &new_cast->node.entry); + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), dst_type->dimx, &new_cast->node, &cast->node.loc))) + return false; + list_add_after(&new_cast->node.entry, &swizzle->node.entry); + + hlsl_replace_node(&cast->node, &swizzle->node); + return true; + } + + return false; +} + +struct copy_propagation_value +{ + struct hlsl_ir_node *node; + unsigned int component; +}; + +struct copy_propagation_var_def +{ + struct rb_entry entry; + struct hlsl_ir_var *var; + struct copy_propagation_value values[]; +}; + +struct copy_propagation_state +{ + struct rb_tree var_defs; +}; + +static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry) +{ + struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); + uintptr_t key_int = (uintptr_t)key, entry_int = (uintptr_t)var_def->var; + + return (key_int > entry_int) - (key_int < entry_int); +} + +static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *context) +{ + struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); + + vkd3d_free(var_def); +} + +static struct copy_propagation_var_def *copy_propagation_get_var_def(const struct copy_propagation_state *state, + const struct hlsl_ir_var *var) +{ + struct rb_entry *entry = rb_get(&state->var_defs, var); + + if (entry) + return RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); + else + return NULL; +} + +static struct copy_propagation_var_def *copy_propagation_create_var_def(struct hlsl_ctx *ctx, + struct copy_propagation_state *state, struct hlsl_ir_var *var) +{ + struct rb_entry *entry = rb_get(&state->var_defs, var); + struct copy_propagation_var_def *var_def; + int res; + + if (entry) + return RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); + + if (!(var_def = hlsl_alloc(ctx, offsetof(struct copy_propagation_var_def, values[var->data_type->reg_size])))) + return NULL; + + var_def->var = var; + + res = rb_put(&state->var_defs, var, &var_def->entry); + assert(!res); + + return var_def; +} + +static void copy_propagation_invalidate_whole_variable(struct copy_propagation_var_def *var_def) +{ + TRACE("Invalidate variable %s.\n", var_def->var->name); + memset(var_def->values, 0, sizeof(*var_def->values) * var_def->var->data_type->reg_size); +} + +static void copy_propagation_set_value(struct copy_propagation_var_def *var_def, unsigned int offset, + unsigned char writemask, struct hlsl_ir_node *node) +{ + unsigned int i, j = 0; + + for (i = 0; i < 4; ++i) + { + if (writemask & (1u << i)) + { + TRACE("Variable %s[%u] is written by instruction %p%s.\n", + var_def->var->name, offset + i, node, debug_hlsl_writemask(1u << i)); + var_def->values[offset + i].node = node; + var_def->values[offset + i].component = j++; + } + } +} + +static struct hlsl_ir_node *copy_propagation_compute_replacement(struct hlsl_ctx *ctx, + const struct copy_propagation_state *state, const struct hlsl_deref *deref, + unsigned int count, unsigned int *swizzle) +{ + const struct hlsl_ir_var *var = deref->var; + struct copy_propagation_var_def *var_def; + struct hlsl_ir_node *node = NULL; + unsigned int offset, i; + + if (!hlsl_offset_from_deref(ctx, deref, &offset)) + return NULL; + + if (!(var_def = copy_propagation_get_var_def(state, var))) + return NULL; + + assert(offset + count <= var_def->var->data_type->reg_size); + + *swizzle = 0; + + for (i = 0; i < count; ++i) + { + if (!node) + { + node = var_def->values[offset + i].node; + } + else if (node != var_def->values[offset + i].node) + { + TRACE("No single source for propagating load from %s[%u-%u].\n", var->name, offset, offset + count); + return NULL; + } + *swizzle |= var_def->values[offset + i].component << i * 2; + } + + TRACE("Load from %s[%u-%u] propagated as instruction %p%s.\n", + var->name, offset, offset + count, node, debug_hlsl_swizzle(*swizzle, count)); + return node; +} + +static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, + struct hlsl_ir_load *load, struct copy_propagation_state *state) +{ + struct hlsl_ir_node *node = &load->node, *new_node; + struct hlsl_type *type = node->data_type; + struct hlsl_ir_swizzle *swizzle_node; + unsigned int dimx = 0; + unsigned int swizzle; + + switch (type->type) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + dimx = type->dimx; + break; + + case HLSL_CLASS_OBJECT: + dimx = 1; + break; + + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_ARRAY: + case HLSL_CLASS_STRUCT: + /* FIXME: Actually we shouldn't even get here, but we don't split + * matrices yet. */ + return false; + } + + if (!(new_node = copy_propagation_compute_replacement(ctx, state, &load->src, dimx, &swizzle))) + return false; + + if (type->type != HLSL_CLASS_OBJECT) + { + if (!(swizzle_node = hlsl_new_swizzle(ctx, swizzle, dimx, new_node, &node->loc))) + return false; + list_add_before(&node->entry, &swizzle_node->node.entry); + new_node = &swizzle_node->node; + } + hlsl_replace_node(node, new_node); + return true; +} + +static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, + struct hlsl_deref *deref, struct copy_propagation_state *state) +{ + struct hlsl_ir_load *load; + struct hlsl_ir_node *node; + unsigned int swizzle; + + if (!(node = copy_propagation_compute_replacement(ctx, state, deref, 1, &swizzle))) + return false; + + /* Only HLSL_IR_LOAD can produce an object. */ + load = hlsl_ir_load(node); + deref->var = load->src.var; + hlsl_src_remove(&deref->offset); + hlsl_src_from_node(&deref->offset, load->src.offset.node); + return true; +} + +static bool copy_propagation_transform_resource_load(struct hlsl_ctx *ctx, + struct hlsl_ir_resource_load *load, struct copy_propagation_state *state) +{ + bool progress = false; + + progress |= copy_propagation_transform_object_load(ctx, &load->resource, state); + if (load->sampler.var) + progress |= copy_propagation_transform_object_load(ctx, &load->sampler, state); + return progress; +} + +static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, + struct copy_propagation_state *state) +{ + struct copy_propagation_var_def *var_def; + struct hlsl_deref *lhs = &store->lhs; + struct hlsl_ir_var *var = lhs->var; + unsigned int offset; + + if (!(var_def = copy_propagation_create_var_def(ctx, state, var))) + return; + + if (hlsl_offset_from_deref(ctx, lhs, &offset)) + { + unsigned int writemask = store->writemask; + + if (store->rhs.node->data_type->type == HLSL_CLASS_OBJECT) + writemask = VKD3DSP_WRITEMASK_0; + copy_propagation_set_value(var_def, offset, writemask, store->rhs.node); + } + else + { + copy_propagation_invalidate_whole_variable(var_def); + } +} + +static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state) +{ + struct hlsl_ir_node *instr, *next; + bool progress = false; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_LOAD: + progress |= copy_propagation_transform_load(ctx, hlsl_ir_load(instr), state); + break; + + case HLSL_IR_RESOURCE_LOAD: + progress |= copy_propagation_transform_resource_load(ctx, hlsl_ir_resource_load(instr), state); + break; + + case HLSL_IR_STORE: + copy_propagation_record_store(ctx, hlsl_ir_store(instr), state); + break; + + case HLSL_IR_IF: + FIXME("Copy propagation doesn't support conditionals yet, leaving.\n"); + return progress; + + case HLSL_IR_LOOP: + FIXME("Copy propagation doesn't support loops yet, leaving.\n"); + return progress; + + default: + break; + } + } + + return progress; +} + +static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + struct copy_propagation_state state; + bool progress; + + rb_init(&state.var_defs, copy_propagation_var_def_compare); + + progress = copy_propagation_transform_block(ctx, block, &state); + + rb_destroy(&state.var_defs, copy_propagation_var_def_destroy, NULL); + + return progress; +} + +static bool is_vec1(const struct hlsl_type *type) +{ + return (type->type == HLSL_CLASS_SCALAR) || (type->type == HLSL_CLASS_VECTOR && type->dimx == 1); +} + +static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + if (instr->type == HLSL_IR_EXPR) + { + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + const struct hlsl_type *src_type = expr->operands[0].node->data_type; + const struct hlsl_type *dst_type = expr->node.data_type; + + if (expr->op != HLSL_OP1_CAST) + return false; + + if (hlsl_types_are_equal(src_type, dst_type) + || (src_type->base_type == dst_type->base_type && is_vec1(src_type) && is_vec1(dst_type))) + { + hlsl_replace_node(&expr->node, expr->operands[0].node); + return true; + } + } + + return false; +} + +/* Helper for split_array_copies() and split_struct_copies(). Inserts new + * instructions right before "store". */ +static bool split_copy(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, + const struct hlsl_ir_load *load, const unsigned int offset, struct hlsl_type *type) +{ + struct hlsl_ir_node *offset_instr, *add; + struct hlsl_ir_store *split_store; + struct hlsl_ir_load *split_load; + struct hlsl_ir_constant *c; + + if (!(c = hlsl_new_uint_constant(ctx, offset, store->node.loc))) + return false; + list_add_before(&store->node.entry, &c->node.entry); + + offset_instr = &c->node; + if (load->src.offset.node) + { + if (!(add = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, load->src.offset.node, &c->node))) + return false; + list_add_before(&store->node.entry, &add->entry); + offset_instr = add; + } + if (!(split_load = hlsl_new_load(ctx, load->src.var, offset_instr, type, store->node.loc))) + return false; + list_add_before(&store->node.entry, &split_load->node.entry); + + offset_instr = &c->node; + if (store->lhs.offset.node) + { + if (!(add = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, store->lhs.offset.node, &c->node))) + return false; + list_add_before(&store->node.entry, &add->entry); + offset_instr = add; + } + + if (!(split_store = hlsl_new_store(ctx, store->lhs.var, offset_instr, &split_load->node, 0, store->node.loc))) + return false; + list_add_before(&store->node.entry, &split_store->node.entry); + + return true; +} + +static bool split_array_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + const struct hlsl_ir_node *rhs; + struct hlsl_type *element_type; + const struct hlsl_type *type; + unsigned int element_size, i; + struct hlsl_ir_store *store; + + if (instr->type != HLSL_IR_STORE) + return false; + + store = hlsl_ir_store(instr); + rhs = store->rhs.node; + type = rhs->data_type; + if (type->type != HLSL_CLASS_ARRAY) + return false; + element_type = type->e.array.type; + element_size = element_type->reg_size; + + for (i = 0; i < type->e.array.elements_count; ++i) + { + if (!split_copy(ctx, store, hlsl_ir_load(rhs), i * element_size, element_type)) + return false; + } + + /* Remove the store instruction, so that we can split structs which contain + * other structs. Although assignments produce a value, we don't allow + * HLSL_IR_STORE to be used as a source. */ + list_remove(&store->node.entry); + hlsl_free_instr(&store->node); + return true; +} + +static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + const struct hlsl_struct_field *field; + const struct hlsl_ir_node *rhs; + const struct hlsl_type *type; + struct hlsl_ir_store *store; + + if (instr->type != HLSL_IR_STORE) + return false; + + store = hlsl_ir_store(instr); + rhs = store->rhs.node; + type = rhs->data_type; + if (type->type != HLSL_CLASS_STRUCT) + return false; + + LIST_FOR_EACH_ENTRY(field, type->e.elements, struct hlsl_struct_field, entry) + { + if (!split_copy(ctx, store, hlsl_ir_load(rhs), field->reg_offset, field->type)) + return false; + } + + /* Remove the store instruction, so that we can split structs which contain + * other structs. Although assignments produce a value, we don't allow + * HLSL_IR_STORE to be used as a source. */ + list_remove(&store->node.entry); + hlsl_free_instr(&store->node); + return true; +} + +static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + const struct hlsl_type *src_type, *dst_type; + struct hlsl_type *dst_vector_type; + struct hlsl_ir_expr *cast; + + if (instr->type != HLSL_IR_EXPR) + return false; + cast = hlsl_ir_expr(instr); + src_type = cast->operands[0].node->data_type; + dst_type = cast->node.data_type; + + if (cast->op == HLSL_OP1_CAST + && src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR + && dst_type->dimx < src_type->dimx) + { + struct hlsl_ir_swizzle *swizzle; + struct hlsl_ir_expr *new_cast; + + dst_vector_type = hlsl_get_vector_type(ctx, dst_type->base_type, src_type->dimx); + /* We need to preserve the cast since it might be doing more than just + * narrowing the vector. */ + if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) + return false; + list_add_after(&cast->node.entry, &new_cast->node.entry); + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, &new_cast->node, &cast->node.loc))) + return false; + list_add_after(&new_cast->node.entry, &swizzle->node.entry); + + hlsl_replace_node(&cast->node, &swizzle->node); + return true; + } + + return false; +} + +static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_swizzle *swizzle; + unsigned int i; + + if (instr->type != HLSL_IR_SWIZZLE) + return false; + swizzle = hlsl_ir_swizzle(instr); + + if (instr->data_type->dimx != swizzle->val.node->data_type->dimx) + return false; + + for (i = 0; i < instr->data_type->dimx; ++i) + if (((swizzle->swizzle >> (2 * i)) & 3) != i) + return false; + + hlsl_replace_node(instr, swizzle->val.node); + + return true; +} + +/* Lower DIV to RCP + MUL. */ +static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_expr *expr; + struct hlsl_ir_node *rcp; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP2_DIV) + return false; + + if (!(rcp = hlsl_new_unary_expr(ctx, HLSL_OP1_RCP, expr->operands[1].node, instr->loc))) + return false; + list_add_before(&expr->node.entry, &rcp->entry); + expr->op = HLSL_OP2_MUL; + hlsl_src_remove(&expr->operands[1]); + hlsl_src_from_node(&expr->operands[1], rcp); + return true; +} + +static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + switch (instr->type) + { + case HLSL_IR_CONSTANT: + case HLSL_IR_EXPR: + case HLSL_IR_LOAD: + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_SWIZZLE: + if (list_empty(&instr->uses)) + { + list_remove(&instr->entry); + hlsl_free_instr(instr); + return true; + } + break; + + case HLSL_IR_STORE: + { + struct hlsl_ir_store *store = hlsl_ir_store(instr); + struct hlsl_ir_var *var = store->lhs.var; + + if (var->last_read < instr->index) + { + list_remove(&instr->entry); + hlsl_free_instr(instr); + return true; + } + break; + } + + case HLSL_IR_IF: + case HLSL_IR_JUMP: + case HLSL_IR_LOOP: + break; + } + + return false; +} + +/* Allocate a unique, ordered index to each instruction, which will be used for + * computing liveness ranges. */ +static unsigned int index_instructions(struct hlsl_block *block, unsigned int index) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + instr->index = index++; + + if (instr->type == HLSL_IR_IF) + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + index = index_instructions(&iff->then_instrs, index); + index = index_instructions(&iff->else_instrs, index); + } + else if (instr->type == HLSL_IR_LOOP) + { + index = index_instructions(&hlsl_ir_loop(instr)->body, index); + hlsl_ir_loop(instr)->next_index = index; + } + } + + return index; +} + +static void dump_function_decl(struct rb_entry *entry, void *context) +{ + struct hlsl_ir_function_decl *func = RB_ENTRY_VALUE(entry, struct hlsl_ir_function_decl, entry); + struct hlsl_ctx *ctx = context; + + if (func->has_body) + hlsl_dump_function(ctx, func); +} + +static void dump_function(struct rb_entry *entry, void *context) +{ + struct hlsl_ir_function *func = RB_ENTRY_VALUE(entry, struct hlsl_ir_function, entry); + struct hlsl_ctx *ctx = context; + + rb_for_each_entry(&func->overloads, dump_function_decl, ctx); +} + +/* Compute the earliest and latest liveness for each variable. In the case that + * a variable is accessed inside of a loop, we promote its liveness to extend + * to at least the range of the entire loop. Note that we don't need to do this + * for anonymous nodes, since there's currently no way to use a node which was + * calculated in an earlier iteration of the loop. */ +static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop_first, unsigned int loop_last) +{ + struct hlsl_ir_node *instr; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + const unsigned int var_last_read = loop_last ? max(instr->index, loop_last) : instr->index; + + switch (instr->type) + { + case HLSL_IR_STORE: + { + struct hlsl_ir_store *store = hlsl_ir_store(instr); + + var = store->lhs.var; + if (!var->first_write) + var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; + store->rhs.node->last_read = instr->index; + if (store->lhs.offset.node) + store->lhs.offset.node->last_read = instr->index; + break; + } + case HLSL_IR_EXPR: + { + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(expr->operands) && expr->operands[i].node; ++i) + expr->operands[i].node->last_read = instr->index; + break; + } + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + compute_liveness_recurse(&iff->then_instrs, loop_first, loop_last); + compute_liveness_recurse(&iff->else_instrs, loop_first, loop_last); + iff->condition.node->last_read = instr->index; + break; + } + case HLSL_IR_LOAD: + { + struct hlsl_ir_load *load = hlsl_ir_load(instr); + + var = load->src.var; + var->last_read = max(var->last_read, var_last_read); + if (load->src.offset.node) + load->src.offset.node->last_read = instr->index; + break; + } + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + compute_liveness_recurse(&loop->body, loop_first ? loop_first : instr->index, + loop_last ? loop_last : loop->next_index); + break; + } + case HLSL_IR_RESOURCE_LOAD: + { + struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + + var = load->resource.var; + var->last_read = max(var->last_read, var_last_read); + if (load->resource.offset.node) + load->resource.offset.node->last_read = instr->index; + + if ((var = load->sampler.var)) + { + var->last_read = max(var->last_read, var_last_read); + if (load->sampler.offset.node) + load->sampler.offset.node->last_read = instr->index; + } + + load->coords.node->last_read = instr->index; + if (load->texel_offset.node) + load->texel_offset.node->last_read = instr->index; + break; + } + case HLSL_IR_SWIZZLE: + { + struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); + + swizzle->val.node->last_read = instr->index; + break; + } + case HLSL_IR_CONSTANT: + case HLSL_IR_JUMP: + break; + } + } +} + +static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +{ + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + + /* Index 0 means unused; index 1 means function entry, so start at 2. */ + index_instructions(&entry_func->body, 2); + + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + var->first_write = var->last_read = 0; + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform || var->is_input_semantic) + var->first_write = 1; + else if (var->is_output_semantic) + var->last_read = UINT_MAX; + } + + compute_liveness_recurse(&entry_func->body, 0, 0); +} + +struct liveness +{ + size_t size; + uint32_t reg_count; + struct + { + /* 0 if not live yet. */ + unsigned int last_read; + } *regs; +}; + +static unsigned int get_available_writemask(struct liveness *liveness, + unsigned int first_write, unsigned int component_idx, unsigned int component_count) +{ + unsigned int i, writemask = 0, count = 0; + + for (i = 0; i < 4; ++i) + { + if (liveness->regs[component_idx + i].last_read <= first_write) + { + writemask |= 1u << i; + if (++count == component_count) + return writemask; + } + } + + return 0; +} + +static bool resize_liveness(struct hlsl_ctx *ctx, struct liveness *liveness, size_t new_count) +{ + size_t old_capacity = liveness->size; + + if (!hlsl_array_reserve(ctx, (void **)&liveness->regs, &liveness->size, new_count, sizeof(*liveness->regs))) + return false; + + if (liveness->size > old_capacity) + memset(liveness->regs + old_capacity, 0, (liveness->size - old_capacity) * sizeof(*liveness->regs)); + return true; +} + +static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct liveness *liveness, + unsigned int first_write, unsigned int last_read, unsigned int component_count) +{ + unsigned int component_idx, writemask, i; + struct hlsl_reg ret = {0}; + + for (component_idx = 0; component_idx < liveness->size; component_idx += 4) + { + if ((writemask = get_available_writemask(liveness, first_write, component_idx, component_count))) + break; + } + if (component_idx == liveness->size) + { + if (!resize_liveness(ctx, liveness, component_idx + 4)) + return ret; + writemask = (1u << component_count) - 1; + } + for (i = 0; i < 4; ++i) + { + if (writemask & (1u << i)) + liveness->regs[component_idx + i].last_read = last_read; + } + ret.id = component_idx / 4; + ret.writemask = writemask; + ret.allocated = true; + liveness->reg_count = max(liveness->reg_count, ret.id + 1); + return ret; +} + +static bool is_range_available(struct liveness *liveness, unsigned int first_write, + unsigned int component_idx, unsigned int component_count) +{ + unsigned int i; + + for (i = 0; i < component_count; i += 4) + { + if (!get_available_writemask(liveness, first_write, component_idx + i, 4)) + return false; + } + return true; +} + +static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct liveness *liveness, + unsigned int first_write, unsigned int last_read, unsigned int component_count) +{ + unsigned int i, component_idx; + struct hlsl_reg ret = {0}; + + for (component_idx = 0; component_idx < liveness->size; component_idx += 4) + { + if (is_range_available(liveness, first_write, component_idx, + min(component_count, liveness->size - component_idx))) + break; + } + if (!resize_liveness(ctx, liveness, component_idx + component_count)) + return ret; + + for (i = 0; i < component_count; ++i) + liveness->regs[component_idx + i].last_read = last_read; + ret.id = component_idx / 4; + ret.allocated = true; + liveness->reg_count = max(liveness->reg_count, ret.id + align(component_count, 4)); + return ret; +} + +static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) +{ + static const char writemask_offset[] = {'w','x','y','z'}; + + if (type->reg_size > 4) + { + if (type->reg_size & 3) + return vkd3d_dbg_sprintf("%c%u-%c%u.%c", class, reg.id, class, + reg.id + (type->reg_size / 4), writemask_offset[type->reg_size & 3]); + + return vkd3d_dbg_sprintf("%c%u-%c%u", class, reg.id, class, + reg.id + (type->reg_size / 4) - 1); + } + return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); +} + +static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct liveness *liveness) +{ + if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) + return; + + if (!var->reg.allocated && var->last_read) + { + if (var->data_type->reg_size > 4) + var->reg = allocate_range(ctx, liveness, var->first_write, + var->last_read, var->data_type->reg_size); + else + var->reg = allocate_register(ctx, liveness, var->first_write, + var->last_read, var->data_type->dimx); + TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, + debug_register('r', var->reg, var->data_type), var->first_write, var->last_read); + } +} + +static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + if (!instr->reg.allocated && instr->last_read) + { + if (instr->data_type->reg_size > 4) + instr->reg = allocate_range(ctx, liveness, instr->index, + instr->last_read, instr->data_type->reg_size); + else + instr->reg = allocate_register(ctx, liveness, instr->index, + instr->last_read, instr->data_type->dimx); + TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, + debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); + } + + switch (instr->type) + { + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + allocate_temp_registers_recurse(ctx, &iff->then_instrs, liveness); + allocate_temp_registers_recurse(ctx, &iff->else_instrs, liveness); + break; + } + + case HLSL_IR_LOAD: + { + struct hlsl_ir_load *load = hlsl_ir_load(instr); + /* We need to at least allocate a variable for undefs. + * FIXME: We should probably find a way to remove them instead. */ + allocate_variable_temp_register(ctx, load->src.var, liveness); + break; + } + + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + allocate_temp_registers_recurse(ctx, &loop->body, liveness); + break; + } + + case HLSL_IR_STORE: + { + struct hlsl_ir_store *store = hlsl_ir_store(instr); + allocate_variable_temp_register(ctx, store->lhs.var, liveness); + break; + } + + default: + break; + } + } +} + +static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) +{ + struct hlsl_constant_defs *defs = &ctx->constant_defs; + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_CONSTANT: + { + struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + const struct hlsl_type *type = instr->data_type; + unsigned int x, y, i, writemask, end_reg; + unsigned int reg_size = type->reg_size; + + if (reg_size > 4) + constant->reg = allocate_range(ctx, liveness, 1, UINT_MAX, reg_size); + else + constant->reg = allocate_register(ctx, liveness, 1, UINT_MAX, type->dimx); + TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); + + if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, + constant->reg.id + reg_size / 4, sizeof(*defs->values))) + return; + end_reg = constant->reg.id + reg_size / 4; + if (end_reg > defs->count) + { + memset(&defs->values[defs->count], 0, sizeof(*defs->values) * (end_reg - defs->count)); + defs->count = end_reg; + } + + assert(type->type <= HLSL_CLASS_LAST_NUMERIC); + + if (!(writemask = constant->reg.writemask)) + writemask = (1u << type->dimx) - 1; + + for (y = 0; y < type->dimy; ++y) + { + for (x = 0, i = 0; x < 4; ++x) + { + const union hlsl_constant_value *value; + float f; + + if (!(writemask & (1u << x))) + continue; + value = &constant->value[i++]; + + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + f = value->b; + break; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + f = value->f; + break; + + case HLSL_TYPE_INT: + f = value->i; + break; + + case HLSL_TYPE_UINT: + f = value->u; + break; + + case HLSL_TYPE_DOUBLE: + FIXME("Double constant.\n"); + return; + + default: + assert(0); + return; + } + defs->values[constant->reg.id + y].f[x] = f; + } + } + + break; + } + + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + allocate_const_registers_recurse(ctx, &iff->then_instrs, liveness); + allocate_const_registers_recurse(ctx, &iff->else_instrs, liveness); + break; + } + + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + allocate_const_registers_recurse(ctx, &loop->body, liveness); + break; + } + + default: + break; + } + } +} + +static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +{ + struct liveness liveness = {0}; + struct hlsl_ir_var *var; + + allocate_const_registers_recurse(ctx, &entry_func->body, &liveness); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->last_read) + { + if (var->data_type->reg_size > 4) + var->reg = allocate_range(ctx, &liveness, 1, UINT_MAX, var->data_type->reg_size); + else + { + var->reg = allocate_register(ctx, &liveness, 1, UINT_MAX, 4); + var->reg.writemask = (1u << var->data_type->dimx) - 1; + } + TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->reg, var->data_type)); + } + } +} + +/* Simple greedy temporary register allocation pass that just assigns a unique + * index to all (simultaneously live) variables or intermediate values. Agnostic + * as to how many registers are actually available for the current backend, and + * does not handle constants. */ +static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +{ + struct liveness liveness = {0}; + allocate_temp_registers_recurse(ctx, &entry_func->body, &liveness); + ctx->temp_count = liveness.reg_count; + vkd3d_free(liveness.regs); +} + +static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) +{ + static const char *shader_names[] = + { + [VKD3D_SHADER_TYPE_PIXEL] = "Pixel", + [VKD3D_SHADER_TYPE_VERTEX] = "Vertex", + [VKD3D_SHADER_TYPE_GEOMETRY] = "Geometry", + [VKD3D_SHADER_TYPE_HULL] = "Hull", + [VKD3D_SHADER_TYPE_DOMAIN] = "Domain", + [VKD3D_SHADER_TYPE_COMPUTE] = "Compute", + }; + + unsigned int type; + uint32_t reg; + bool builtin; + + assert(var->semantic.name); + + if (ctx->profile->major_version < 4) + { + D3DDECLUSAGE usage; + uint32_t usage_idx; + + if (!hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Invalid semantic '%s'.", var->semantic.name); + return; + } + + if ((!output && !var->last_read) || (output && !var->first_write)) + return; + + builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, ®); + } + else + { + D3D_NAME usage; + bool has_idx; + + if (!hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Invalid semantic '%s'.", var->semantic.name); + return; + } + if ((builtin = hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx))) + reg = has_idx ? var->semantic.index : 0; + } + + if (builtin) + { + TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[ctx->profile->type], + output ? "output" : "input", var->semantic.name, var->semantic.index, type, reg); + } + else + { + var->reg.allocated = true; + var->reg.id = (*counter)++; + var->reg.writemask = (1 << var->data_type->dimx) - 1; + TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', var->reg, var->data_type)); + } +} + +static void allocate_semantic_registers(struct hlsl_ctx *ctx) +{ + unsigned int input_counter = 0, output_counter = 0; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_input_semantic) + allocate_semantic_register(ctx, var, &input_counter, false); + if (var->is_output_semantic) + allocate_semantic_register(ctx, var, &output_counter, true); + } +} + +static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t index) +{ + const struct hlsl_buffer *buffer; + + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) + { + if (buffer->used_size && buffer->reservation.type == 'b' && buffer->reservation.index == index) + return buffer; + } + return NULL; +} + +static void calculate_buffer_offset(struct hlsl_ir_var *var) +{ + struct hlsl_buffer *buffer = var->buffer; + + buffer->size = hlsl_type_get_sm4_offset(var->data_type, buffer->size); + + var->buffer_offset = buffer->size; + TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); + buffer->size += var->data_type->reg_size; + if (var->last_read) + buffer->used_size = buffer->size; +} + +static void allocate_buffers(struct hlsl_ctx *ctx) +{ + struct hlsl_buffer *buffer; + struct hlsl_ir_var *var; + uint32_t index = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->data_type->type != HLSL_CLASS_OBJECT) + { + if (var->is_param) + var->buffer = ctx->params_buffer; + + calculate_buffer_offset(var); + } + } + + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!buffer->used_size) + continue; + + if (buffer->type == HLSL_BUFFER_CONSTANT) + { + if (buffer->reservation.type == 'b') + { + const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.index); + + if (reserved_buffer && reserved_buffer != buffer) + { + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, + "Multiple buffers bound to cb%u.", buffer->reservation.index); + hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, + "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.index); + } + + buffer->reg.id = buffer->reservation.index; + buffer->reg.allocated = true; + TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); + } + else if (!buffer->reservation.type) + { + while (get_reserved_buffer(ctx, index)) + ++index; + + buffer->reg.id = index; + buffer->reg.allocated = true; + TRACE("Allocated %s to cb%u.\n", buffer->name, index); + ++index; + } + else + { + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Constant buffers must be allocated to register type 'b'."); + } + } + else + { + FIXME("Allocate registers for texture buffers.\n"); + } + } +} + +static const struct hlsl_ir_var *get_reserved_object(struct hlsl_ctx *ctx, char type, uint32_t index) +{ + const struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) + { + if (var->last_read && var->reg_reservation.type == type && var->reg_reservation.index == index) + return var; + } + return NULL; +} + +static const struct object_type_info +{ + enum hlsl_base_type type; + char reg_name; +} +object_types[] = +{ + { HLSL_TYPE_SAMPLER, 's' }, + { HLSL_TYPE_TEXTURE, 't' }, +}; + +static const struct object_type_info *get_object_type_info(enum hlsl_base_type type) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(object_types); ++i) + if (type == object_types[i].type) + return &object_types[i]; + + WARN("No type info for object type %u.\n", type); + return NULL; +} + +static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_base_type type) +{ + const struct object_type_info *type_info = get_object_type_info(type); + struct hlsl_ir_var *var; + uint32_t index = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->last_read || var->data_type->type != HLSL_CLASS_OBJECT + || var->data_type->base_type != type) + continue; + + if (var->reg_reservation.type == type_info->reg_name) + { + const struct hlsl_ir_var *reserved_object = get_reserved_object(ctx, type_info->reg_name, + var->reg_reservation.index); + + if (reserved_object && reserved_object != var) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, + "Multiple objects bound to %c%u.", type_info->reg_name, + var->reg_reservation.index); + hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, + "Object '%s' is already bound to %c%u.", reserved_object->name, + type_info->reg_name, var->reg_reservation.index); + } + + var->reg.id = var->reg_reservation.index; + var->reg.allocated = true; + TRACE("Allocated reserved %s to %c%u.\n", var->name, type_info->reg_name, var->reg_reservation.index); + } + else if (!var->reg_reservation.type) + { + while (get_reserved_object(ctx, type_info->reg_name, index)) + ++index; + + var->reg.id = index; + var->reg.allocated = true; + TRACE("Allocated object to %c%u.\n", type_info->reg_name, index); + ++index; + } + else + { + struct vkd3d_string_buffer *type_string; + + type_string = hlsl_type_to_string(ctx, var->data_type); + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Object of type '%s' must be bound to register type '%c'.", + type_string->buffer, type_info->reg_name); + hlsl_release_string_buffer(ctx, type_string); + } + } +} + +static bool type_is_single_reg(const struct hlsl_type *type) +{ + return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR; +} + +bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) +{ + struct hlsl_ir_node *offset_node = deref->offset.node; + + if (!offset_node) + { + *offset = 0; + return true; + } + + /* We should always have generated a cast to UINT. */ + assert(offset_node->data_type->type == HLSL_CLASS_SCALAR + && offset_node->data_type->base_type == HLSL_TYPE_UINT); + + if (offset_node->type != HLSL_IR_CONSTANT) + return false; + + *offset = hlsl_ir_constant(offset_node)->value[0].u; + + if (*offset >= deref->var->data_type->reg_size) + { + hlsl_error(ctx, &deref->offset.node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Dereference is out of bounds."); + return false; + } + + return true; +} + +unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) +{ + unsigned int offset; + + if (hlsl_offset_from_deref(ctx, deref, &offset)) + return offset; + + hlsl_fixme(ctx, &deref->offset.node->loc, "Dereference with non-constant offset of type %s.", + hlsl_node_type_to_string(deref->offset.node->type)); + + return 0; +} + +struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct hlsl_type *type) +{ + const struct hlsl_ir_var *var = deref->var; + struct hlsl_reg ret = var->reg; + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + ret.id += offset / 4; + + if (type_is_single_reg(var->data_type)) + { + assert(!offset); + ret.writemask = var->reg.writemask; + } + else + { + assert(type_is_single_reg(type)); + ret.writemask = ((1 << type->dimx) - 1) << (offset % 4); + } + return ret; +} + +int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) +{ + struct hlsl_block *const body = &entry_func->body; + struct hlsl_ir_var *var; + bool progress; + + list_move_head(&body->instrs, &ctx->static_initializers); + + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + { + if (var->modifiers & HLSL_STORAGE_UNIFORM) + prepend_uniform_copy(ctx, &body->instrs, var); + } + + LIST_FOR_EACH_ENTRY(var, entry_func->parameters, struct hlsl_ir_var, param_entry) + { + if (var->data_type->type == HLSL_CLASS_OBJECT || (var->modifiers & HLSL_STORAGE_UNIFORM)) + { + prepend_uniform_copy(ctx, &body->instrs, var); + } + else + { + if (var->data_type->type != HLSL_CLASS_STRUCT && !var->semantic.name) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Parameter \"%s\" is missing a semantic.", var->name); + + if (var->modifiers & HLSL_STORAGE_IN) + prepend_input_var_copy(ctx, &body->instrs, var); + if (var->modifiers & HLSL_STORAGE_OUT) + append_output_var_copy(ctx, &body->instrs, var); + } + } + if (entry_func->return_var) + { + if (entry_func->return_var->data_type->type != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); + + append_output_var_copy(ctx, &body->instrs, entry_func->return_var); + } + + transform_ir(ctx, lower_broadcasts, body, NULL); + while (transform_ir(ctx, fold_redundant_casts, body, NULL)); + do + { + progress = transform_ir(ctx, split_array_copies, body, NULL); + progress |= transform_ir(ctx, split_struct_copies, body, NULL); + } + while (progress); + transform_ir(ctx, lower_narrowing_casts, body, NULL); + do + { + progress = transform_ir(ctx, hlsl_fold_constants, body, NULL); + progress |= copy_propagation_execute(ctx, body); + progress |= transform_ir(ctx, remove_trivial_swizzles, body, NULL); + } + while (progress); + + if (ctx->profile->major_version < 4) + transform_ir(ctx, lower_division, body, NULL); + + do + compute_liveness(ctx, entry_func); + while (transform_ir(ctx, dce, body, NULL)); + + compute_liveness(ctx, entry_func); + + if (TRACE_ON()) + rb_for_each_entry(&ctx->functions, dump_function, ctx); + + allocate_temp_registers(ctx, entry_func); + if (ctx->profile->major_version < 4) + { + allocate_const_registers(ctx, entry_func); + } + else + { + allocate_buffers(ctx); + allocate_objects(ctx, HLSL_TYPE_TEXTURE); + } + allocate_semantic_registers(ctx); + allocate_objects(ctx, HLSL_TYPE_SAMPLER); + + if (ctx->result) + return ctx->result; + + switch (target_type) + { + case VKD3D_SHADER_TARGET_D3D_BYTECODE: + return hlsl_sm1_write(ctx, entry_func, out); + + case VKD3D_SHADER_TARGET_DXBC_TPF: + return hlsl_sm4_write(ctx, entry_func, out); + + default: + ERR("Unsupported shader target type %#x.\n", target_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } +} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c new file mode 100644 index 00000000000..51cee179e37 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -0,0 +1,285 @@ +/* + * HLSL constant value operations for constant folding + * + * Copyright 2022 Francisco Casas for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "hlsl.h" + +static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) +{ + unsigned int k; + uint32_t u; + int32_t i; + double d; + float f; + bool b; + + if (dst->node.data_type->dimx != src->node.data_type->dimx + || dst->node.data_type->dimy != src->node.data_type->dimy) + { + FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), + debug_hlsl_type(ctx, dst->node.data_type)); + return false; + } + + for (k = 0; k < 4; ++k) + { + switch (src->node.data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + u = src->value[k].f; + i = src->value[k].f; + f = src->value[k].f; + d = src->value[k].f; + b = src->value[k].f; + break; + + case HLSL_TYPE_DOUBLE: + u = src->value[k].d; + i = src->value[k].d; + f = src->value[k].d; + d = src->value[k].d; + b = src->value[k].d; + break; + + case HLSL_TYPE_INT: + u = src->value[k].i; + i = src->value[k].i; + f = src->value[k].i; + d = src->value[k].i; + b = src->value[k].i; + break; + + case HLSL_TYPE_UINT: + u = src->value[k].u; + i = src->value[k].u; + f = src->value[k].u; + d = src->value[k].u; + b = src->value[k].u; + break; + + case HLSL_TYPE_BOOL: + u = src->value[k].b; + i = src->value[k].b; + f = src->value[k].b; + d = src->value[k].b; + b = src->value[k].b; + break; + + default: + FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), + debug_hlsl_type(ctx, dst->node.data_type)); + return false; + } + + switch (dst->node.data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->value[k].f = f; + break; + + case HLSL_TYPE_DOUBLE: + dst->value[k].d = d; + break; + + case HLSL_TYPE_INT: + dst->value[k].i = i; + break; + + case HLSL_TYPE_UINT: + dst->value[k].u = u; + break; + + case HLSL_TYPE_BOOL: + dst->value[k].b = b; + break; + + default: + FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), + debug_hlsl_type(ctx, dst->node.data_type)); + return false; + } + } + return true; +} + +static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = dst->node.data_type->base_type; + unsigned int k; + + assert(type == src->node.data_type->base_type); + + for (k = 0; k < 4; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->value[k].f = -src->value[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->value[k].d = -src->value[k].d; + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + dst->value[k].u = -src->value[k].u; + break; + + default: + FIXME("Fold negation for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + return false; + } + } + return true; +} + +static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src1, + struct hlsl_ir_constant *src2) +{ + enum hlsl_base_type type = dst->node.data_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + + for (k = 0; k < 4; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->value[k].f = src1->value[k].f + src2->value[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->value[k].d = src1->value[k].d + src2->value[k].d; + break; + + /* Handling HLSL_TYPE_INT through the unsigned field to avoid + * undefined behavior with signed integers in C. */ + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + dst->value[k].u = src1->value[k].u + src2->value[k].u; + break; + + default: + FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + return false; + } + } + return true; +} + +static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, + struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +{ + enum hlsl_base_type type = dst->node.data_type->base_type; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + + for (int k = 0; k < 4; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->value[k].f = src1->value[k].f * src2->value[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->value[k].d = src1->value[k].d * src2->value[k].d; + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + dst->value[k].u = src1->value[k].u * src2->value[k].u; + break; + + default: + FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + return false; + } + } + return true; +} + +bool hlsl_fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_constant *arg1, *arg2 = NULL, *res; + struct hlsl_ir_expr *expr; + unsigned int i; + bool success; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + + for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) + { + if (expr->operands[i].node && expr->operands[i].node->type != HLSL_IR_CONSTANT) + return false; + } + arg1 = hlsl_ir_constant(expr->operands[0].node); + if (expr->operands[1].node) + arg2 = hlsl_ir_constant(expr->operands[1].node); + + if (!(res = hlsl_alloc(ctx, sizeof(*res)))) + return false; + init_node(&res->node, HLSL_IR_CONSTANT, instr->data_type, instr->loc); + + switch (expr->op) + { + case HLSL_OP1_CAST: + success = fold_cast(ctx, res, arg1); + break; + + case HLSL_OP1_NEG: + success = fold_neg(ctx, res, arg1); + break; + + case HLSL_OP2_ADD: + success = fold_add(ctx, res, arg1, arg2); + break; + + case HLSL_OP2_MUL: + success = fold_mul(ctx, res, arg1, arg2); + break; + + default: + FIXME("Fold \"%s\" expression.\n", debug_hlsl_expr_op(expr->op)); + success = false; + break; + } + + if (success) + { + list_add_before(&expr->node.entry, &res->node.entry); + hlsl_replace_node(&expr->node, &res->node); + } + else + { + vkd3d_free(res); + } + return success; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c new file mode 100644 index 00000000000..90ec6058c4a --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c @@ -0,0 +1,849 @@ +/* + * HLSL code generation for DXBC shader models 1-3 + * + * Copyright 2019-2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "hlsl.h" +#include + +bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) +{ + unsigned int i; + + static const struct + { + const char *semantic; + bool output; + enum vkd3d_shader_type shader_type; + unsigned int major_version; + D3DSHADER_PARAM_REGISTER_TYPE type; + DWORD offset; + } + register_table[] = + { + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, + {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, + {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { + if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + && output == register_table[i].output + && ctx->profile->type == register_table[i].shader_type + && ctx->profile->major_version == register_table[i].major_version) + { + *type = register_table[i].type; + if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) + *reg = register_table[i].offset; + else + *reg = semantic->index; + return true; + } + } + + return false; +} + +bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) +{ + static const struct + { + const char *name; + D3DDECLUSAGE usage; + } + semantics[] = + { + {"binormal", D3DDECLUSAGE_BINORMAL}, + {"blendindices", D3DDECLUSAGE_BLENDINDICES}, + {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, + {"color", D3DDECLUSAGE_COLOR}, + {"depth", D3DDECLUSAGE_DEPTH}, + {"fog", D3DDECLUSAGE_FOG}, + {"normal", D3DDECLUSAGE_NORMAL}, + {"position", D3DDECLUSAGE_POSITION}, + {"positiont", D3DDECLUSAGE_POSITIONT}, + {"psize", D3DDECLUSAGE_PSIZE}, + {"sample", D3DDECLUSAGE_SAMPLE}, + {"sv_depth", D3DDECLUSAGE_DEPTH}, + {"sv_position", D3DDECLUSAGE_POSITION}, + {"sv_target", D3DDECLUSAGE_COLOR}, + {"tangent", D3DDECLUSAGE_TANGENT}, + {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, + {"texcoord", D3DDECLUSAGE_TEXCOORD}, + }; + + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { + if (!ascii_strcasecmp(semantic->name, semantics[i].name)) + { + *usage = semantics[i].usage; + *usage_idx = semantic->index; + return true; + } + } + + return false; +} + +static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) +{ + if (type == VKD3D_SHADER_TYPE_VERTEX) + return D3DVS_VERSION(major, minor); + else + return D3DPS_VERSION(major, minor); +} + +static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) +{ + switch (type->type) + { + case HLSL_CLASS_ARRAY: + return sm1_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3DXPC_MATRIX_COLUMNS; + else + return D3DXPC_MATRIX_ROWS; + case HLSL_CLASS_OBJECT: + return D3DXPC_OBJECT; + case HLSL_CLASS_SCALAR: + return D3DXPC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3DXPC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3DXPC_VECTOR; + default: + ERR("Invalid class %#x.\n", type->type); + assert(0); + return 0; + } +} + +static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + return D3DXPT_BOOL; + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3DXPT_FLOAT; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return D3DXPT_INT; + case HLSL_TYPE_PIXELSHADER: + return D3DXPT_PIXELSHADER; + case HLSL_TYPE_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_SAMPLER; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + } + break; + case HLSL_TYPE_STRING: + return D3DXPT_STRING; + case HLSL_TYPE_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_TEXTURE; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + } + break; + case HLSL_TYPE_VERTEXSHADER: + return D3DXPT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3DXPT_VOID; + default: + assert(0); + } + assert(0); + return 0; +} + +static const struct hlsl_type *get_array_type(const struct hlsl_type *type) +{ + if (type->type == HLSL_CLASS_ARRAY) + return get_array_type(type->e.array.type); + return type; +} + +static unsigned int get_array_size(const struct hlsl_type *type) +{ + if (type->type == HLSL_CLASS_ARRAY) + return get_array_size(type->e.array.type) * type->e.array.elements_count; + return 1; +} + +static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) +{ + const struct hlsl_type *array_type = get_array_type(type); + unsigned int array_size = get_array_size(type); + struct hlsl_struct_field *field; + unsigned int field_count = 0; + size_t fields_offset = 0; + + if (type->bytecode_offset) + return; + + if (array_type->type == HLSL_CLASS_STRUCT) + { + LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry) + { + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm1_type(buffer, field->type, ctab_start); + } + + fields_offset = bytecode_get_size(buffer) - ctab_start; + + LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry) + { + put_u32(buffer, field->name_bytecode_offset - ctab_start); + put_u32(buffer, field->type->bytecode_offset - ctab_start); + ++field_count; + } + } + + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(type))); + put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); +} + +static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) +{ + struct hlsl_ir_var *var; + + list_remove(&to_sort->extern_entry); + + LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + { + if (strcmp(to_sort->name, var->name) < 0) + { + list_add_before(&var->extern_entry, &to_sort->extern_entry); + return; + } + } + + list_add_tail(sorted, &to_sort->extern_entry); +} + +static void sm1_sort_externs(struct hlsl_ctx *ctx) +{ + struct list sorted = LIST_INIT(sorted); + struct hlsl_ir_var *var, *next; + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + sm1_sort_extern(&sorted, var); + list_move_tail(&ctx->extern_vars, &sorted); +} + +static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + struct hlsl_ir_function_decl *entry_func) +{ + size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; + unsigned int uniform_count = 0; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->semantic.name && var->reg.allocated) + { + ++uniform_count; + + if (var->is_param && var->is_uniform) + { + struct vkd3d_string_buffer *name; + + if (!(name = hlsl_get_string_buffer(ctx))) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + vkd3d_string_buffer_printf(name, "$%s", var->name); + vkd3d_free((char *)var->name); + var->name = hlsl_strdup(ctx, name->buffer); + hlsl_release_string_buffer(ctx, name); + } + } + } + + sm1_sort_externs(ctx); + + size_offset = put_u32(buffer, 0); + ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); + + ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); + creator_offset = put_u32(buffer, 0); + put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + put_u32(buffer, uniform_count); + put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ + put_u32(buffer, 0); /* FIXME: flags */ + put_u32(buffer, 0); /* FIXME: target string */ + + vars_start = bytecode_get_size(buffer); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->semantic.name && var->reg.allocated) + { + put_u32(buffer, 0); /* name */ + if (var->data_type->type == HLSL_CLASS_OBJECT + && (var->data_type->base_type == HLSL_TYPE_SAMPLER + || var->data_type->base_type == HLSL_TYPE_TEXTURE)) + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->reg.id)); + put_u32(buffer, 1); + } + else + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->reg.id)); + put_u32(buffer, var->data_type->reg_size / 4); + } + put_u32(buffer, 0); /* type */ + put_u32(buffer, 0); /* FIXME: default value */ + } + } + + uniform_count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->semantic.name && var->reg.allocated) + { + size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); + size_t name_offset; + + name_offset = put_string(buffer, var->name); + set_u32(buffer, var_offset, name_offset - ctab_start); + + write_sm1_type(buffer, var->data_type, ctab_start); + set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); + ++uniform_count; + } + } + + offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(buffer, creator_offset, offset - ctab_start); + + ctab_end = bytecode_get_size(buffer); + set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); +} + +static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) +{ + return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) + | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); +} + +struct sm1_instruction +{ + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; + + struct sm1_dst_register + { + D3DSHADER_PARAM_REGISTER_TYPE type; + D3DSHADER_PARAM_DSTMOD_TYPE mod; + unsigned int writemask; + uint32_t reg; + } dst; + + struct sm1_src_register + { + D3DSHADER_PARAM_REGISTER_TYPE type; + D3DSHADER_PARAM_SRCMOD_TYPE mod; + unsigned int swizzle; + uint32_t reg; + } srcs[2]; + unsigned int src_count; + + unsigned int has_dst; +}; + +static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) +{ + assert(reg->writemask); + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); +} + +static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, + const struct sm1_src_register *reg, unsigned int dst_writemask) +{ + unsigned int swizzle = hlsl_map_swizzle(reg->swizzle, dst_writemask); + + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (swizzle << 16) | reg->reg); +} + +static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct sm1_instruction *instr) +{ + uint32_t token = instr->opcode; + unsigned int i; + + if (ctx->profile->major_version > 1) + token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + if (instr->has_dst) + write_sm1_dst_register(buffer, &instr->dst); + + for (i = 0; i < instr->src_count; ++i) + write_sm1_src_register(buffer, &instr->srcs[i], instr->dst.writemask); +}; + +static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2) +{ + const struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, + }; + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod) +{ + const struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), + .srcs[0].reg = src->id, + .srcs[0].mod = src_mod, + .src_count = 1, + }; + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + unsigned int i, x; + + for (i = 0; i < ctx->constant_defs.count; ++i) + { + uint32_t token = D3DSIO_DEF; + const struct sm1_dst_register reg = + { + .type = D3DSPR_CONST, + .writemask = VKD3DSP_WRITEMASK_ALL, + .reg = i, + }; + + if (ctx->profile->major_version > 1) + token |= 5 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + write_sm1_dst_register(buffer, ®); + for (x = 0; x < 4; ++x) + put_f32(buffer, ctx->constant_defs.values[i].f[x]); + } +} + +static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_var *var, bool output) +{ + struct sm1_dst_register reg = {0}; + uint32_t token, usage_idx; + D3DDECLUSAGE usage; + bool ret; + + if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) + { + usage = 0; + usage_idx = 0; + } + else + { + ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); + assert(ret); + reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; + reg.reg = var->reg.id; + } + + token = D3DSIO_DCL; + if (ctx->profile->major_version > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + token = (1u << 31); + token |= usage << D3DSP_DCL_USAGE_SHIFT; + token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; + put_u32(buffer, token); + + reg.writemask = (1 << var->data_type->dimx) - 1; + write_sm1_dst_register(buffer, ®); +} + +static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + bool write_in = false, write_out = false; + struct hlsl_ir_var *var; + + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + write_in = true; + else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) + write_in = write_out = true; + else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) + write_in = true; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (write_in && var->is_input_semantic) + write_sm1_semantic_dcl(ctx, buffer, var, false); + if (write_out && var->is_output_semantic) + write_sm1_semantic_dcl(ctx, buffer, var, true); + } +} + +static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_CONST, + .srcs[0].reg = constant->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), + .src_count = 1, + }; + + assert(instr->reg.allocated); + assert(constant->reg.allocated); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + struct hlsl_ir_node *arg1 = expr->operands[0].node; + struct hlsl_ir_node *arg2 = expr->operands[1].node; + unsigned int i; + + assert(instr->reg.allocated); + + if (instr->data_type->base_type != HLSL_TYPE_FLOAT) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); + return; + } + + switch (expr->op) + { + case HLSL_OP1_NEG: + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG); + break; + + case HLSL_OP1_RCP: + for (i = 0; i < instr->data_type->dimx; ++i) + { + struct hlsl_reg src = arg1->reg, dst = instr->reg; + + src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); + dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); + write_sm1_unary_op(ctx, buffer, D3DSIO_RCP, &dst, &src, 0); + } + break; + + case HLSL_OP2_ADD: + write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MAX: + write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MIN: + write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MUL: + write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); + break; + } +} + +static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_load *load = hlsl_ir_load(instr); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src, instr->data_type); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), + .src_count = 1, + }; + + assert(instr->reg.allocated); + + if (load->src.var->is_uniform) + { + assert(reg.allocated); + sm1_instr.srcs[0].type = D3DSPR_CONST; + } + else if (load->src.var->is_input_semantic) + { + if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, + false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) + { + assert(reg.allocated); + sm1_instr.srcs[0].type = D3DSPR_INPUT; + sm1_instr.srcs[0].reg = reg.id; + } + else + sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); + } + + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_store *store = hlsl_ir_store(instr); + const struct hlsl_ir_node *rhs = store->rhs.node; + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs, rhs->data_type); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = reg.id, + .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = rhs->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), + .src_count = 1, + }; + + if (store->lhs.var->data_type->type == HLSL_CLASS_MATRIX) + { + FIXME("Matrix writemasks need to be lowered.\n"); + return; + } + + if (store->lhs.var->is_output_semantic) + { + if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, + true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) + { + assert(reg.allocated); + sm1_instr.dst.type = D3DSPR_OUTPUT; + sm1_instr.dst.reg = reg.id; + } + else + sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; + } + else + assert(reg.allocated); + + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); + const struct hlsl_ir_node *val = swizzle->val.node; + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = val->reg.id, + .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), + swizzle->swizzle, instr->data_type->dimx), + .src_count = 1, + }; + + assert(instr->reg.allocated); + assert(val->reg.allocated); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_function_decl *entry_func) +{ + const struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->type == HLSL_CLASS_MATRIX) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); + continue; + } + else if (instr->data_type->type == HLSL_CLASS_OBJECT) + { + hlsl_fixme(ctx, &instr->loc, "Object copy.\n"); + break; + } + + assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR); + } + + switch (instr->type) + { + case HLSL_IR_CONSTANT: + write_sm1_constant(ctx, buffer, instr); + break; + + case HLSL_IR_EXPR: + write_sm1_expr(ctx, buffer, instr); + break; + + case HLSL_IR_LOAD: + write_sm1_load(ctx, buffer, instr); + break; + + case HLSL_IR_STORE: + write_sm1_store(ctx, buffer, instr); + break; + + case HLSL_IR_SWIZZLE: + write_sm1_swizzle(ctx, buffer, instr); + break; + + default: + FIXME("Unhandled instruction type %s.\n", hlsl_node_type_to_string(instr->type)); + } + } +} + +int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + int ret; + + put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + + write_sm1_uniforms(ctx, &buffer, entry_func); + + write_sm1_constant_defs(ctx, &buffer); + write_sm1_semantic_dcls(ctx, &buffer); + write_sm1_instructions(ctx, &buffer, entry_func); + + put_u32(&buffer, D3DSIO_END); + + if (!(ret = buffer.status)) + { + out->code = buffer.data; + out->size = buffer.size; + } + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c new file mode 100644 index 00000000000..fc07e6ea788 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c @@ -0,0 +1,2102 @@ +/* + * HLSL code generation for DXBC shader models 4-5 + * + * Copyright 2019-2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "hlsl.h" +#include +#include "d3dcommon.h" +#include "sm4.h" + +static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); + +bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) +{ + unsigned int i; + + static const struct + { + const char *semantic; + bool output; + enum vkd3d_shader_type shader_type; + enum vkd3d_sm4_register_type type; + enum vkd3d_sm4_swizzle_type swizzle_type; + bool has_idx; + } + register_table[] = + { + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_RT_PRIMID, VKD3D_SM4_SWIZZLE_NONE, false}, + + /* Put sv_target in this table, instead of letting it fall through to + * default varying allocation, so that the register index matches the + * usage index. */ + {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_RT_OUTPUT, VKD3D_SM4_SWIZZLE_VEC4, true}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_RT_DEPTHOUT, VKD3D_SM4_SWIZZLE_VEC4, false}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_RT_DEPTHOUT, VKD3D_SM4_SWIZZLE_VEC4, false}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_RT_OUTPUT, VKD3D_SM4_SWIZZLE_VEC4, true}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { + if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + && output == register_table[i].output + && ctx->profile->type == register_table[i].shader_type) + { + *type = register_table[i].type; + if (swizzle_type) + *swizzle_type = register_table[i].swizzle_type; + *has_idx = register_table[i].has_idx; + return true; + } + } + + return false; +} + +bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, D3D_NAME *usage) +{ + unsigned int i; + + static const struct + { + const char *name; + bool output; + enum vkd3d_shader_type shader_type; + D3DDECLUSAGE usage; + } + semantics[] = + { + {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, + + {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, + + {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + + {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, + + {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, + }; + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { + if (!ascii_strcasecmp(semantic->name, semantics[i].name) + && output == semantics[i].output + && ctx->profile->type == semantics[i].shader_type + && !ascii_strncasecmp(semantic->name, "sv_", 3)) + { + *usage = semantics[i].usage; + return true; + } + } + + if (!ascii_strncasecmp(semantic->name, "sv_", 3)) + return false; + + *usage = D3D_NAME_UNDEFINED; + return true; +} + +static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + struct vkd3d_string_buffer *string; + const struct hlsl_ir_var *var; + size_t count_position; + unsigned int i; + bool ret; + + count_position = put_u32(&buffer, 0); + put_u32(&buffer, 8); /* unknown */ + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; + enum vkd3d_sm4_register_type type; + uint32_t usage_idx, reg_idx; + D3D_NAME usage; + bool has_idx; + + if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) + continue; + + ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + assert(ret); + usage_idx = var->semantic.index; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) + { + reg_idx = has_idx ? var->semantic.index : ~0u; + } + else + { + assert(var->reg.allocated); + type = VKD3D_SM4_RT_INPUT; + reg_idx = var->reg.id; + } + + use_mask = width; /* FIXME: accurately report use mask */ + if (output) + use_mask = 0xf ^ use_mask; + + /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ + if (usage >= 64) + usage = 0; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, usage_idx); + put_u32(&buffer, usage); + switch (var->data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); + break; + + case HLSL_TYPE_INT: + put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); + break; + + default: + if ((string = hlsl_type_to_string(ctx, var->data_type))) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Invalid data type %s for semantic variable %s.", string->buffer, var->name); + hlsl_release_string_buffer(ctx, string); + put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); + } + put_u32(&buffer, reg_idx); + put_u32(&buffer, vkd3d_make_u16(width, use_mask)); + } + + i = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + const char *semantic = var->semantic.name; + size_t string_offset; + D3D_NAME usage; + + if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) + continue; + + hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + + if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) + string_offset = put_string(&buffer, "SV_Target"); + else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) + string_offset = put_string(&buffer, "SV_Depth"); + else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) + string_offset = put_string(&buffer, "SV_Position"); + else + string_offset = put_string(&buffer, semantic); + set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); + } + + set_u32(&buffer, count_position, i); + + dxbc_writer_add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, buffer.data, buffer.size); +} + +static const struct hlsl_type *get_array_type(const struct hlsl_type *type) +{ + if (type->type == HLSL_CLASS_ARRAY) + return get_array_type(type->e.array.type); + return type; +} + +static unsigned int get_array_size(const struct hlsl_type *type) +{ + if (type->type == HLSL_CLASS_ARRAY) + return get_array_size(type->e.array.type) * type->e.array.elements_count; + return 1; +} + +static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +{ + switch (type->type) + { + case HLSL_CLASS_ARRAY: + return sm4_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3D_SVC_MATRIX_COLUMNS; + else + return D3D_SVC_MATRIX_ROWS; + case HLSL_CLASS_OBJECT: + return D3D_SVC_OBJECT; + case HLSL_CLASS_SCALAR: + return D3D_SVC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3D_SVC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3D_SVC_VECTOR; + default: + ERR("Invalid class %#x.\n", type->type); + assert(0); + return 0; + } +} + +static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + return D3D_SVT_BOOL; + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3D_SVT_FLOAT; + case HLSL_TYPE_INT: + return D3D_SVT_INT; + case HLSL_TYPE_PIXELSHADER: + return D3D_SVT_PIXELSHADER; + case HLSL_TYPE_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SVT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SVT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3D_SVT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SVT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3D_SVT_SAMPLER; + default: + assert(0); + } + break; + case HLSL_TYPE_STRING: + return D3D_SVT_STRING; + case HLSL_TYPE_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SVT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SVT_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3D_SVT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SVT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3D_SVT_TEXTURE; + default: + assert(0); + } + break; + case HLSL_TYPE_UINT: + return D3D_SVT_UINT; + case HLSL_TYPE_VERTEXSHADER: + return D3D_SVT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3D_SVT_VOID; + default: + assert(0); + } + assert(0); + return 0; +} + +static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) +{ + const struct hlsl_type *array_type = get_array_type(type); + const char *name = array_type->name ? array_type->name : ""; + const struct hlsl_profile_info *profile = ctx->profile; + unsigned int field_count = 0, array_size = 0; + size_t fields_offset = 0, name_offset = 0; + struct hlsl_struct_field *field; + + if (type->bytecode_offset) + return; + + if (profile->major_version >= 5) + name_offset = put_string(buffer, name); + + if (type->type == HLSL_CLASS_ARRAY) + array_size = get_array_size(type); + + if (array_type->type == HLSL_CLASS_STRUCT) + { + LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry) + { + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm4_type(ctx, buffer, field->type); + } + + fields_offset = bytecode_get_size(buffer); + + LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry) + { + put_u32(buffer, field->name_bytecode_offset); + put_u32(buffer, field->type->bytecode_offset); + put_u32(buffer, field->reg_offset); + ++field_count; + } + } + + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); + put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); + + if (profile->major_version >= 5) + { + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, name_offset); + } +} + +static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_SAMPLER: + return D3D_SIT_SAMPLER; + case HLSL_TYPE_TEXTURE: + return D3D_SIT_TEXTURE; + default: + assert(0); + return 0; + } +} + +static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) +{ + switch (type->e.resource_format->base_type) + { + case HLSL_TYPE_DOUBLE: + return D3D_RETURN_TYPE_DOUBLE; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3D_RETURN_TYPE_FLOAT; + + case HLSL_TYPE_INT: + return D3D_RETURN_TYPE_SINT; + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + return D3D_RETURN_TYPE_UINT; + + default: + assert(0); + return 0; + } +} + +static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) +{ + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SRV_DIMENSION_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SRV_DIMENSION_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3D_SRV_DIMENSION_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SRV_DIMENSION_TEXTURECUBE; + case HLSL_SAMPLER_DIM_1DARRAY: + return D3D_SRV_DIMENSION_TEXTURE1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return D3D_SRV_DIMENSION_TEXTURE2DARRAY; + case HLSL_SAMPLER_DIM_2DMS: + return D3D_SRV_DIMENSION_TEXTURE2DMS; + case HLSL_SAMPLER_DIM_2DMSARRAY: + return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; + case HLSL_SAMPLER_DIM_CUBEARRAY: + return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; + default: + assert(0); + return D3D_SRV_DIMENSION_UNKNOWN; + } +} + +static int sm4_compare_externs(const struct hlsl_ir_var *a, const struct hlsl_ir_var *b) +{ + if (a->data_type->base_type != b->data_type->base_type) + return a->data_type->base_type - b->data_type->base_type; + if (a->reg.allocated && b->reg.allocated) + return a->reg.id - b->reg.id; + return strcmp(a->name, b->name); +} + +static void sm4_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) +{ + struct hlsl_ir_var *var; + + list_remove(&to_sort->extern_entry); + + LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + { + if (sm4_compare_externs(to_sort, var) < 0) + { + list_add_before(&var->extern_entry, &to_sort->extern_entry); + return; + } + } + + list_add_tail(sorted, &to_sort->extern_entry); +} + +static void sm4_sort_externs(struct hlsl_ctx *ctx) +{ + struct list sorted = LIST_INIT(sorted); + struct hlsl_ir_var *var, *next; + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->data_type->type == HLSL_CLASS_OBJECT) + sm4_sort_extern(&sorted, var); + } + list_move_tail(&ctx->extern_vars, &sorted); +} + +static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +{ + size_t cbuffers_offset, resources_offset, creator_offset, string_offset; + size_t cbuffer_position, resource_position, creator_position; + unsigned int cbuffer_count = 0, resource_count = 0, i, j; + const struct hlsl_profile_info *profile = ctx->profile; + struct vkd3d_bytecode_buffer buffer = {0}; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + + static const uint16_t target_types[] = + { + 0xffff, /* PIXEL */ + 0xfffe, /* VERTEX */ + 0x4753, /* GEOMETRY */ + 0x4853, /* HULL */ + 0x4453, /* DOMAIN */ + 0x4353, /* COMPUTE */ + }; + + sm4_sort_externs(ctx); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->reg.allocated && var->data_type->type == HLSL_CLASS_OBJECT) + ++resource_count; + } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) + { + ++cbuffer_count; + ++resource_count; + } + } + + put_u32(&buffer, cbuffer_count); + cbuffer_position = put_u32(&buffer, 0); + put_u32(&buffer, resource_count); + resource_position = put_u32(&buffer, 0); + put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), + target_types[profile->type])); + put_u32(&buffer, 0); /* FIXME: compilation flags */ + creator_position = put_u32(&buffer, 0); + + if (profile->major_version >= 5) + { + put_u32(&buffer, TAG_RD11); + put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ + put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ + put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ + put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ + put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ + put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ + put_u32(&buffer, 0); /* unknown; possibly a null terminator */ + } + + /* Bound resources. */ + + resources_offset = bytecode_get_size(&buffer); + set_u32(&buffer, resource_position, resources_offset); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + uint32_t flags = 0; + + if (!var->reg.allocated || var->data_type->type != HLSL_CLASS_OBJECT) + continue; + + if (var->reg_reservation.type) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, sm4_resource_type(var->data_type)); + if (var->data_type->base_type == HLSL_TYPE_SAMPLER) + { + put_u32(&buffer, 0); + put_u32(&buffer, 0); + put_u32(&buffer, 0); + } + else + { + put_u32(&buffer, sm4_resource_format(var->data_type)); + put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); + put_u32(&buffer, ~0u); /* FIXME: multisample count */ + flags |= (var->data_type->e.resource_format->dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + } + put_u32(&buffer, var->reg.id); + put_u32(&buffer, 1); /* bind count */ + put_u32(&buffer, flags); + } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + uint32_t flags = 0; + + if (!cbuffer->reg.allocated) + continue; + + if (cbuffer->reservation.type) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); + put_u32(&buffer, 0); /* return type */ + put_u32(&buffer, 0); /* dimension */ + put_u32(&buffer, 0); /* multisample count */ + put_u32(&buffer, cbuffer->reg.id); /* bind point */ + put_u32(&buffer, 1); /* bind count */ + put_u32(&buffer, flags); /* flags */ + } + + i = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->reg.allocated || var->data_type->type != HLSL_CLASS_OBJECT) + continue; + + string_offset = put_string(&buffer, var->name); + set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); + } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!cbuffer->reg.allocated) + continue; + + string_offset = put_string(&buffer, cbuffer->name); + set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); + } + + assert(i == resource_count); + + /* Buffers. */ + + cbuffers_offset = bytecode_get_size(&buffer); + set_u32(&buffer, cbuffer_position, cbuffers_offset); + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + unsigned int var_count = 0; + + if (!cbuffer->reg.allocated) + continue; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer) + ++var_count; + } + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, var_count); + put_u32(&buffer, 0); /* variable offset */ + put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); + put_u32(&buffer, 0); /* FIXME: flags */ + put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); + } + + i = 0; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!cbuffer->reg.allocated) + continue; + + string_offset = put_string(&buffer, cbuffer->name); + set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); + } + + i = 0; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + size_t vars_start = bytecode_get_size(&buffer); + + if (!cbuffer->reg.allocated) + continue; + + set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer) + { + uint32_t flags = 0; + + if (var->last_read) + flags |= D3D_SVF_USED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, var->buffer_offset * sizeof(float)); + put_u32(&buffer, var->data_type->reg_size * sizeof(float)); + put_u32(&buffer, flags); + put_u32(&buffer, 0); /* type */ + put_u32(&buffer, 0); /* FIXME: default value */ + + if (profile->major_version >= 5) + { + put_u32(&buffer, 0); /* texture start */ + put_u32(&buffer, 0); /* texture count */ + put_u32(&buffer, 0); /* sampler start */ + put_u32(&buffer, 0); /* sampler count */ + } + } + } + + j = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer) + { + const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); + size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); + size_t string_offset = put_string(&buffer, var->name); + + set_u32(&buffer, var_offset, string_offset); + write_sm4_type(ctx, &buffer, var->data_type); + set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); + ++j; + } + } + } + + creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(&buffer, creator_position, creator_offset); + + dxbc_writer_add_section(dxbc, TAG_RDEF, buffer.data, buffer.size); +} + +static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) +{ + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return VKD3D_SM4_RESOURCE_TEXTURE_1D; + case HLSL_SAMPLER_DIM_2D: + return VKD3D_SM4_RESOURCE_TEXTURE_2D; + case HLSL_SAMPLER_DIM_3D: + return VKD3D_SM4_RESOURCE_TEXTURE_3D; + case HLSL_SAMPLER_DIM_CUBE: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; + case HLSL_SAMPLER_DIM_1DARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; + case HLSL_SAMPLER_DIM_2DMS: + return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; + case HLSL_SAMPLER_DIM_2DMSARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; + case HLSL_SAMPLER_DIM_CUBEARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; + default: + assert(0); + return 0; + } +} + +struct sm4_instruction_modifier +{ + enum vkd3d_sm4_instruction_modifier type; + + union + { + struct + { + int u, v, w; + } aoffimmi; + } u; +}; + +static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) +{ + uint32_t word = 0; + + word |= VKD3D_SM4_MODIFIER_MASK & imod->type; + + switch (imod->type) + { + case VKD3D_SM4_MODIFIER_AOFFIMMI: + assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); + assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); + assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); + word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; + word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; + word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; + break; + + default: + assert(0); + break; + } + + return word; +} + +struct sm4_register +{ + enum vkd3d_sm4_register_type type; + uint32_t idx[2]; + unsigned int idx_count; + enum vkd3d_sm4_dimension dim; + uint32_t immconst_uint[4]; + unsigned int mod; +}; + +struct sm4_instruction +{ + enum vkd3d_sm4_opcode opcode; + + struct sm4_instruction_modifier modifiers[1]; + unsigned int modifier_count; + + struct sm4_dst_register + { + struct sm4_register reg; + unsigned int writemask; + } dsts[2]; + unsigned int dst_count; + + struct sm4_src_register + { + struct sm4_register reg; + enum vkd3d_sm4_swizzle_type swizzle_type; + unsigned int swizzle; + } srcs[4]; + unsigned int src_count; + + uint32_t idx[2]; + unsigned int idx_count; +}; + +static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, + unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, + const struct hlsl_deref *deref, const struct hlsl_type *data_type) +{ + const struct hlsl_ir_var *var = deref->var; + + if (var->is_uniform) + { + if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_TEXTURE) + { + reg->type = VKD3D_SM4_RT_RESOURCE; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = var->reg.id; + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_SAMPLER) + { + reg->type = VKD3D_SM4_RT_SAMPLER; + reg->dim = VKD3D_SM4_DIMENSION_NONE; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + reg->idx[0] = var->reg.id; + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; + + assert(data_type->type <= HLSL_CLASS_VECTOR); + reg->type = VKD3D_SM4_RT_CONSTBUFFER; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = var->buffer->reg.id; + reg->idx[1] = offset / 4; + reg->idx_count = 2; + *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); + } + } + else if (var->is_input_semantic) + { + bool has_idx; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, swizzle_type, &has_idx)) + { + if (has_idx) + { + reg->idx[0] = var->semantic.index; + reg->idx_count = 1; + } + + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *writemask = (1u << data_type->dimx) - 1; + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type); + + assert(hlsl_reg.allocated); + reg->type = VKD3D_SM4_RT_INPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else if (var->is_output_semantic) + { + bool has_idx; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, swizzle_type, &has_idx)) + { + if (has_idx) + { + reg->idx[0] = var->semantic.index; + reg->idx_count = 1; + } + + if (reg->type == VKD3D_SM4_RT_DEPTHOUT) + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + else + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *writemask = (1u << data_type->dimx) - 1; + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type); + + assert(hlsl_reg.allocated); + reg->type = VKD3D_SM4_RT_OUTPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + reg->idx[0] = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref, data_type); + + assert(hlsl_reg.allocated); + reg->type = VKD3D_SM4_RT_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } +} + +static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, + const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) +{ + unsigned int writemask; + + sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); + if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +} + +static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask, + enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) +{ + assert(instr->reg.allocated); + reg->type = VKD3D_SM4_RT_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = instr->reg.id; + reg->idx_count = 1; + *writemask = instr->reg.writemask; +} + +static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir_node *instr) +{ + unsigned int swizzle_type; + + sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr); +} + +static void sm4_src_from_node(struct sm4_src_register *src, + const struct hlsl_ir_node *instr, unsigned int map_writemask) +{ + unsigned int writemask; + + sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); + if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +} + +static uint32_t sm4_encode_register(const struct sm4_register *reg) +{ + return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) + | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) + | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); +} + +static uint32_t sm4_register_order(const struct sm4_register *reg) +{ + uint32_t order = 1; + if (reg->type == VKD3D_SM4_RT_IMMCONST) + order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; + order += reg->idx_count; + if (reg->mod) + ++order; + return order; +} + +static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) +{ + uint32_t token = instr->opcode; + unsigned int size = 1, i, j; + + size += instr->modifier_count; + for (i = 0; i < instr->dst_count; ++i) + size += sm4_register_order(&instr->dsts[i].reg); + for (i = 0; i < instr->src_count; ++i) + size += sm4_register_order(&instr->srcs[i].reg); + size += instr->idx_count; + + token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); + + if (instr->modifier_count > 0) + token |= VKD3D_SM4_INSTRUCTION_MODIFIER; + put_u32(buffer, token); + + for (i = 0; i < instr->modifier_count; ++i) + { + token = sm4_encode_instruction_modifier(&instr->modifiers[i]); + if (instr->modifier_count > i + 1) + token |= VKD3D_SM4_INSTRUCTION_MODIFIER; + put_u32(buffer, token); + } + + for (i = 0; i < instr->dst_count; ++i) + { + token = sm4_encode_register(&instr->dsts[i].reg); + if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) + token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; + put_u32(buffer, token); + + for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) + put_u32(buffer, instr->dsts[i].reg.idx[j]); + } + + for (i = 0; i < instr->src_count; ++i) + { + token = sm4_encode_register(&instr->srcs[i].reg); + token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; + if (instr->srcs[i].reg.mod) + token |= VKD3D_SM4_EXTENDED_OPERAND; + put_u32(buffer, token); + + if (instr->srcs[i].reg.mod) + put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); + + for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) + put_u32(buffer, instr->srcs[i].reg.idx[j]); + + if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) + { + put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); + if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) + { + put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); + put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); + put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); + } + } + } + + for (j = 0; j < instr->idx_count; ++j) + put_u32(buffer, instr->idx[j]); +} + +static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, + const struct hlsl_ir_node *texel_offset) +{ + struct sm4_instruction_modifier modif; + struct hlsl_ir_constant *offset; + + if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) + return false; + offset = hlsl_ir_constant(texel_offset); + + modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; + modif.u.aoffimmi.u = offset->value[0].i; + modif.u.aoffimmi.v = offset->value[1].i; + modif.u.aoffimmi.w = offset->value[2].i; + if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 + || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 + || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) + return false; + + instr->modifiers[instr->modifier_count++] = modif; + return true; +} + +static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) +{ + const struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, + + .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, + .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, + .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, + .srcs[0].reg.idx_count = 2, + .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, + .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), + .src_count = 1, + }; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_sampler(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +{ + const struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + + .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, + .dsts[0].reg.idx = {var->reg.id}, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + }; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_texture(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +{ + const struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_RESOURCE + | (sm4_resource_dimension(var->data_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT), + + .dsts[0].reg.type = VKD3D_SM4_RT_RESOURCE, + .dsts[0].reg.idx = {var->reg.id}, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + + .idx[0] = sm4_resource_format(var->data_type) * 0x1111, + .idx_count = 1, + }; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +{ + const struct hlsl_profile_info *profile = ctx->profile; + const bool output = var->is_output_semantic; + D3D_NAME usage; + bool has_idx; + + struct sm4_instruction instr = + { + .dsts[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, + .dst_count = 1, + }; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) + { + if (has_idx) + { + instr.dsts[0].reg.idx[0] = var->semantic.index; + instr.dsts[0].reg.idx_count = 1; + } + else + { + instr.dsts[0].reg.idx_count = 0; + } + instr.dsts[0].writemask = (1 << var->data_type->dimx) - 1; + } + else + { + instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; + instr.dsts[0].reg.idx[0] = var->reg.id; + instr.dsts[0].reg.idx_count = 1; + instr.dsts[0].writemask = var->reg.writemask; + } + + if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) + instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + + hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + + if (var->is_input_semantic) + { + switch (usage) + { + case D3D_NAME_UNDEFINED: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; + break; + + case D3D_NAME_INSTANCE_ID: + case D3D_NAME_PRIMITIVE_ID: + case D3D_NAME_VERTEX_ID: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; + break; + + default: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; + break; + } + + if (profile->type == VKD3D_SHADER_TYPE_PIXEL) + instr.opcode |= VKD3DSIM_LINEAR << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + } + else + { + if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) + instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; + else + instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; + } + + switch (usage) + { + case D3D_NAME_COVERAGE: + case D3D_NAME_DEPTH: + case D3D_NAME_DEPTH_GREATER_EQUAL: + case D3D_NAME_DEPTH_LESS_EQUAL: + case D3D_NAME_TARGET: + case D3D_NAME_UNDEFINED: + break; + + default: + instr.idx_count = 1; + instr.idx[0] = usage; + break; + } + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_TEMPS, + + .idx = {temp_count}, + .idx_count = 1, + }; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_RET, + }; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], src, instr.dsts[0].writemask); + instr.srcs[0].reg.mod = src_mod; + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, + const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); + instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; + + sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[dst_idx].writemask); + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_constant(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_constant *constant) +{ + const unsigned int dimx = constant->node.data_type->dimx; + struct sm4_instruction instr; + struct sm4_register *reg = &instr.srcs[0].reg; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_dst_from_node(&instr.dsts[0], &constant->node); + instr.dst_count = 1; + + instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + reg->type = VKD3D_SM4_RT_IMMCONST; + if (dimx == 1) + { + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + reg->immconst_uint[0] = constant->value[0].u; + } + else + { + unsigned int i, j = 0; + + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + for (i = 0; i < 4; ++i) + { + if (instr.dsts[0].writemask & (1u << i)) + reg->immconst_uint[i] = constant->value[j++].u; + } + } + instr.src_count = 1, + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_ir_node *coords) +{ + struct sm4_instruction instr; + unsigned int dim_count; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_LD; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); + + /* Mipmap level is in the last component in the IR, but needs to be in the W + * component in the instruction. */ + dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); + if (dim_count == 1) + instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4); + if (dim_count == 2) + instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, Y, X, Z), 4); + + sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); + + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_deref *sampler, + const struct hlsl_ir_node *coords, const struct hlsl_ir_node *texel_offset) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_SAMPLE; + + if (texel_offset) + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7.\n"); + return; + } + } + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 3; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_expr(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +{ + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_ir_node *arg2 = expr->operands[1].node; + + assert(expr->node.reg.allocated); + + switch (expr->node.data_type->base_type) + { + case HLSL_TYPE_FLOAT: + { + switch (expr->op) + { + case HLSL_OP1_ABS: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); + break; + + case HLSL_OP1_CAST: + { + const struct hlsl_type *src_type = arg1->data_type; + + /* Narrowing casts were already lowered. */ + assert(src_type->dimx == expr->node.data_type->dimx); + + switch (src_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: + hlsl_fixme(ctx, &expr->node.loc, "Casts from bool to float are not implemented.\n"); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "Casts from double to float are not implemented.\n"); + break; + + default: + break; + } + break; + } + + case HLSL_OP1_EXP2: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FLOOR: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOG2: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); + break; + + case HLSL_OP1_NEG: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); + break; + + case HLSL_OP1_ROUND: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_SAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV + | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), + &expr->node, arg1, 0); + break; + + case HLSL_OP2_ADD: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_DIV: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MAX: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MIN: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MUL: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 float \"%s\" expression.", debug_hlsl_expr_op(expr->op)); + break; + } + break; + } + + case HLSL_TYPE_INT: + { + switch (expr->op) + { + case HLSL_OP1_CAST: + { + const struct hlsl_type *src_type = arg1->data_type; + + /* Narrowing casts were already lowered. */ + assert(src_type->dimx == expr->node.data_type->dimx); + + switch (src_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from bool to int."); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); + break; + + default: + break; + } + break; + } + + case HLSL_OP1_NEG: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); + break; + + case HLSL_OP2_MAX: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MIN: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 int \"%s\" expression.", debug_hlsl_expr_op(expr->op)); + break; + } + break; + } + + case HLSL_TYPE_UINT: + { + switch (expr->op) + { + case HLSL_OP1_CAST: + { + const struct hlsl_type *src_type = arg1->data_type; + + /* Narrowing casts were already lowered. */ + assert(src_type->dimx == expr->node.data_type->dimx); + + switch (src_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from bool to uint.\n"); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint.\n"); + break; + + default: + break; + } + break; + } + + case HLSL_OP2_MAX: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MIN: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MUL: + /* Using IMUL instead of UMUL because we're taking the low + * bits, and the native compiler generates IMUL. */ + write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 uint \"%s\" expression.\n", debug_hlsl_expr_op(expr->op)); + break; + } + break; + } + + case HLSL_TYPE_BOOL: + { + switch (expr->op) + { + case HLSL_OP2_EQUAL: + { + const struct hlsl_type *src_type = arg1->data_type; + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_NEQUAL: + { + const struct hlsl_type *src_type = arg1->data_type; + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_LESS: + { + const struct hlsl_type *src_type = arg1->data_type; + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_GEQUAL: + { + const struct hlsl_type *src_type = arg1->data_type; + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 bool \"%s\" expression.", debug_hlsl_expr_op(expr->op)); + break; + } + break; + } + + default: + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, expr->node.data_type))) + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", string->buffer); + hlsl_release_string_buffer(ctx, string); + break; + } + } +} + +static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, + .src_count = 1, + }; + + assert(iff->condition.node->data_type->dimx == 1); + + sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); + write_sm4_instruction(buffer, &instr); + + write_sm4_block(ctx, buffer, &iff->then_instrs); + + if (!list_empty(&iff->else_instrs.instrs)) + { + instr.opcode = VKD3D_SM4_OP_ELSE; + instr.src_count = 0; + write_sm4_instruction(buffer, &instr); + + write_sm4_block(ctx, buffer, &iff->else_instrs); + } + + instr.opcode = VKD3D_SM4_OP_ENDIF; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_load(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_dst_from_node(&instr.dsts[0], &load->node); + instr.dst_count = 1; + + sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, load->node.data_type, instr.dsts[0].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_loop(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_LOOP, + }; + + write_sm4_instruction(buffer, &instr); + + write_sm4_block(ctx, buffer, &loop->body); + + instr.opcode = VKD3D_SM4_OP_ENDLOOP; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_deref *sampler, + const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) +{ + struct sm4_src_register *src; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + + instr.opcode = VKD3D_SM4_OP_GATHER4; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); + + /* FIXME: Use an aoffimmi modifier if possible. */ + if (texel_offset) + { + instr.opcode = VKD3D_SM5_OP_GATHER4_PO; + sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); + } + + sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); + + src = &instr.srcs[instr.src_count++]; + sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; + src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; + src->swizzle = swizzle; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_resource_load(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_type *resource_type = load->resource.var->data_type; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + + if (load->sampler.var) + { + const struct hlsl_type *sampler_type = load->sampler.var->data_type; + + assert(sampler_type->type == HLSL_CLASS_OBJECT); + assert(sampler_type->base_type == HLSL_TYPE_SAMPLER); + assert(sampler_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC); + + if (!load->sampler.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); + return; + } + } + + if (!load->resource.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); + return; + } + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: + write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, coords); + break; + + case HLSL_RESOURCE_SAMPLE: + if (!load->sampler.var) + hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); + write_sm4_sample(ctx, buffer, resource_type, &load->node, + &load->resource, &load->sampler, coords, texel_offset); + break; + + case HLSL_RESOURCE_GATHER_RED: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_GREEN: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_BLUE: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_ALPHA: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); + break; + } +} + +static void write_sm4_store(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) +{ + const struct hlsl_ir_node *rhs = store->rhs.node; + struct sm4_instruction instr; + unsigned int writemask; + + if (store->lhs.var->data_type->type == HLSL_CLASS_MATRIX) + { + hlsl_fixme(ctx, &store->node.loc, "Store to a matrix variable.\n"); + return; + } + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); + instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_swizzle(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) +{ + struct sm4_instruction instr; + unsigned int writemask; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_dst_from_node(&instr.dsts[0], &swizzle->node); + instr.dst_count = 1; + + sm4_register_from_node(&instr.srcs[0].reg, &writemask, &instr.srcs[0].swizzle_type, swizzle->val.node); + instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), + swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_block *block) +{ + const struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->type == HLSL_CLASS_MATRIX) + { + FIXME("Matrix operations need to be lowered.\n"); + break; + } + else if (instr->data_type->type == HLSL_CLASS_OBJECT) + { + hlsl_fixme(ctx, &instr->loc, "Object copy.\n"); + break; + } + + assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR); + } + + switch (instr->type) + { + case HLSL_IR_CONSTANT: + write_sm4_constant(ctx, buffer, hlsl_ir_constant(instr)); + break; + + case HLSL_IR_EXPR: + write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); + break; + + case HLSL_IR_IF: + write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); + break; + + case HLSL_IR_LOAD: + write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: + write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_LOOP: + write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); + break; + + case HLSL_IR_STORE: + write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWIZZLE: + write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); + break; + + default: + FIXME("Unhandled instruction type %s.\n", hlsl_node_type_to_string(instr->type)); + } + } +} + +static void write_sm4_shdr(struct hlsl_ctx *ctx, + const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) +{ + const struct hlsl_profile_info *profile = ctx->profile; + struct vkd3d_bytecode_buffer buffer = {0}; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + size_t token_count_position; + + static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = + { + VKD3D_SM4_PS, + VKD3D_SM4_VS, + VKD3D_SM4_GS, + VKD3D_SM5_HS, + VKD3D_SM5_DS, + VKD3D_SM5_CS, + 0, /* EFFECT */ + 0, /* TEXTURE */ + VKD3D_SM4_LIB, + }; + + put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); + token_count_position = put_u32(&buffer, 0); + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) + write_sm4_dcl_constant_buffer(&buffer, cbuffer); + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) + { + if (!var->reg.allocated || var->data_type->type != HLSL_CLASS_OBJECT) + continue; + + if (var->data_type->base_type == HLSL_TYPE_SAMPLER) + write_sm4_dcl_sampler(&buffer, var); + else if (var->data_type->base_type == HLSL_TYPE_TEXTURE) + write_sm4_dcl_texture(&buffer, var); + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) + write_sm4_dcl_semantic(ctx, &buffer, var); + } + + if (ctx->temp_count) + write_sm4_dcl_temps(&buffer, ctx->temp_count); + + write_sm4_block(ctx, &buffer, &entry_func->body); + + write_sm4_ret(&buffer); + + set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); + + dxbc_writer_add_section(dxbc, TAG_SHDR, buffer.data, buffer.size); +} + +int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +{ + struct dxbc_writer dxbc; + size_t i; + int ret; + + dxbc_writer_init(&dxbc); + + write_sm4_signature(ctx, &dxbc, false); + write_sm4_signature(ctx, &dxbc, true); + write_sm4_rdef(ctx, &dxbc); + write_sm4_shdr(ctx, entry_func, &dxbc); + + if (!(ret = ctx->result)) + ret = dxbc_writer_write(&dxbc, out); + for (i = 0; i < dxbc.section_count; ++i) + vkd3d_free((void *)dxbc.sections[i].data); + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h new file mode 100644 index 00000000000..4860cf5f90e --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h @@ -0,0 +1,148 @@ +/* + * HLSL preprocessor + * + * Copyright 2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_SHADER_PREPROC_H +#define __VKD3D_SHADER_PREPROC_H + +#include "vkd3d_shader_private.h" +#include "wine/rbtree.h" + +struct preproc_if_state +{ + /* Are we currently in a "true" block? */ + bool current_true; + /* Have we seen a "true" block in this #if..#endif yet? */ + bool seen_true; + /* Have we seen an #else yet? */ + bool seen_else; +}; + +struct preproc_buffer +{ + void *lexer_buffer; + struct vkd3d_shader_location location; +}; + +struct preproc_file +{ + struct preproc_buffer buffer; + struct vkd3d_shader_code code; + char *filename; + + struct preproc_if_state *if_stack; + size_t if_count, if_stack_size; +}; + +struct preproc_text +{ + struct vkd3d_string_buffer text; + struct vkd3d_shader_location location; +}; + +struct preproc_expansion +{ + struct preproc_buffer buffer; + const struct preproc_text *text; + /* Back-pointer to the macro, if this expansion a macro body. This is + * necessary so that argument tokens can be correctly replaced. */ + struct preproc_macro *macro; +}; + +struct preproc_macro +{ + struct rb_entry entry; + char *name; + + char **arg_names; + size_t arg_count; + struct preproc_text *arg_values; + + struct preproc_text body; +}; + +struct preproc_ctx +{ + const struct vkd3d_shader_preprocess_info *preprocess_info; + void *scanner; + + struct vkd3d_shader_message_context *message_context; + struct vkd3d_string_buffer buffer; + + struct preproc_file *file_stack; + size_t file_count, file_stack_size; + + struct preproc_expansion *expansion_stack; + size_t expansion_count, expansion_stack_size; + + struct rb_tree macros; + + /* It's possible to parse as many as two function-like macros at once: one + * in the main text, and another inside of #if directives. E.g. + * + * func1( + * #if func2(...) + * #endif + * ) + * + * It's not possible to parse more than two, however. In the case of nested + * calls like "func1(func2(...))", we store everything inside the outer + * parentheses as unparsed text, and then parse it once the argument is + * actually invoked. + */ + struct preproc_func_state + { + struct preproc_macro *macro; + size_t arg_count; + enum + { + STATE_NONE = 0, + STATE_IDENTIFIER, + STATE_ARGS, + } state; + unsigned int paren_depth; + } text_func, directive_func; + + int current_directive; + + int lookahead_token; + + bool last_was_newline; + bool last_was_eof; + bool last_was_defined; + + bool error; +}; + +bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_location *loc, char *name, char **arg_names, + size_t arg_count, const struct vkd3d_shader_location *body_loc, struct vkd3d_string_buffer *body); +void preproc_close_include(struct preproc_ctx *ctx, const struct vkd3d_shader_code *code); +struct preproc_macro *preproc_find_macro(struct preproc_ctx *ctx, const char *name); +void preproc_free_macro(struct preproc_macro *macro); +bool preproc_push_include(struct preproc_ctx *ctx, char *filename, const struct vkd3d_shader_code *code); +void preproc_warning(struct preproc_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(4, 5); + +static inline struct preproc_file *preproc_get_top_file(struct preproc_ctx *ctx) +{ + assert(ctx->file_count); + return &ctx->file_stack[ctx->file_count - 1]; +} + +#endif diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l new file mode 100644 index 00000000000..3c072a6988f --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -0,0 +1,823 @@ +/* + * HLSL preprocessor + * + * Copyright 2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +%{ + +#include "preproc.h" +#include "preproc.tab.h" + +#undef ERROR /* defined in wingdi.h */ + +#define YYSTYPE PREPROC_YYSTYPE +#define YYLTYPE PREPROC_YYLTYPE + +#define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner) + +static void update_location(struct preproc_ctx *ctx); + +#define YY_USER_ACTION update_location(yyget_extra(yyscanner)); + +%} + +%option 8bit +%option bison-bridge +%option bison-locations +%option extra-type="struct preproc_ctx *" +%option never-interactive +%option noinput +%option nounput +%option noyy_top_state +%option noyywrap +%option prefix="preproc_yy" +%option reentrant +%option stack + + /* Because these can both be terminated by EOF, we need states for them. */ +%s C_COMMENT +%s CXX_COMMENT + +%s ERROR +%s INCLUDE +%s LINE + +NEWLINE \r?\n +WS [ \t] +IDENTIFIER [A-Za-z_][A-Za-z0-9_]* +INT_SUFFIX [uUlL]{0,2} + +%% + +"//" {yy_push_state(CXX_COMMENT, yyscanner);} +"/*" {yy_push_state(C_COMMENT, yyscanner);} +\\{NEWLINE} {} +\n { + yy_pop_state(yyscanner); + BEGIN(INITIAL); + return T_NEWLINE; + } +"*/" {yy_pop_state(yyscanner);} +<> {yy_pop_state(yyscanner);} +. {} + +(\\{NEWLINE}|[^\n])* {return T_STRING;} + +defined/\( {return T_DEFINED;} +defined {return T_DEFINED;} +{IDENTIFIER}/\( {return T_IDENTIFIER_PAREN;} +{IDENTIFIER} {return T_IDENTIFIER;} + +"<=" {return T_LE;} +">=" {return T_GE;} +"==" {return T_EQ;} +"!=" {return T_NE;} +"&&" {return T_AND;} +"||" {return T_OR;} + + /* We have no use for floats, but shouldn't parse them as integers. */ + +[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[hHfF]? {return T_TEXT;} +[0-9]+\.([eE][+-]?[0-9]+)?[hHfF]? {return T_TEXT;} +[0-9]+([eE][+-]?[0-9]+)?[hHfF] {return T_TEXT;} +[0-9]+[eE][+-]?[0-9]+ {return T_TEXT;} +0[xX][0-9a-fA-f]+{INT_SUFFIX} {return T_INTEGER;} +0[0-7]*{INT_SUFFIX} {return T_INTEGER;} +[1-9][0-9]*{INT_SUFFIX} {return T_INTEGER;} + +## {return T_CONCAT;} + +"++" {return T_TEXT;} +"--" {return T_TEXT;} +"<<"=? {return T_TEXT;} +">>"=? {return T_TEXT;} +[-+*/%&|^]= {return T_TEXT;} + + /* Native doesn't preserve these tokens when running the preprocessor on its + * own, but there's no good reason to emulate that difference yet. */ +[pv]s\.[123]\.[0-4x] {return T_TEXT;} + +\"[^"]*\" {return T_STRING;} +\<[^>]*\> {return T_STRING;} + + /* C strings (including escaped quotes). */ +\"([^"\\]|\\.)*\" {return T_TEXT;} + +#{WS}*{IDENTIFIER} { + struct preproc_ctx *ctx = yyget_extra(yyscanner); + const char *p; + + if (!ctx->last_was_newline) + return T_HASHSTRING; + + for (p = yytext + 1; strchr(" \t", *p); ++p) + ; + + if (!strcmp(p, "error")) + { + BEGIN(ERROR); + return T_ERROR; + } + + if (!strcmp(p, "include")) + { + BEGIN(INCLUDE); + return T_INCLUDE; + } + + if (!strcmp(p, "line")) + { + BEGIN(LINE); + return T_LINE; + } + + if (!strcmp(p, "define")) + return T_DEFINE; + if (!strcmp(p, "elif")) + return T_ELIF; + if (!strcmp(p, "else")) + return T_ELSE; + if (!strcmp(p, "endif")) + return T_ENDIF; + if (!strcmp(p, "if")) + return T_IF; + if (!strcmp(p, "ifdef")) + return T_IFDEF; + if (!strcmp(p, "ifndef")) + return T_IFNDEF; + if (!strcmp(p, "pragma")) + return T_PRAGMA; + if (!strcmp(p, "undef")) + return T_UNDEF; + + preproc_warning(ctx, yyget_lloc(yyscanner), VKD3D_SHADER_WARNING_PP_UNKNOWN_DIRECTIVE, + "Ignoring unknown directive \"%s\".", yytext); + return T_TEXT; + } + +\\{NEWLINE} {} +{NEWLINE} { + BEGIN(INITIAL); + return T_NEWLINE; + } + +{WS}+ {} +[-()\[\]{},+!*/<>&|^?:] {return yytext[0];} +. {return T_TEXT;} + +%% + +static void update_location(struct preproc_ctx *ctx) +{ + struct preproc_buffer *buffer = &preproc_get_top_file(ctx)->buffer; + unsigned int i, leng = yyget_leng(ctx->scanner); + const char *text = yyget_text(ctx->scanner); + + /* We want to do this here, rather than before calling yylex(), because + * some tokens are skipped by the lexer. */ + + *yyget_lloc(ctx->scanner) = buffer->location; + + for (i = 0; i < leng; ++i) + { + ++buffer->location.column; + if (text[i] == '\n') + { + buffer->location.column = 1; + ++buffer->location.line; + } + } +} + +static bool preproc_is_writing(struct preproc_ctx *ctx) +{ + const struct preproc_file *file; + + /* This can happen while checking for unterminated macro invocation. */ + if (!ctx->file_count) + return true; + file = preproc_get_top_file(ctx); + if (!file->if_count) + return true; + return file->if_stack[file->if_count - 1].current_true; +} + +static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) +{ + if (!ctx->expansion_count) + return NULL; + return ctx->expansion_stack[ctx->expansion_count - 1].macro; +} + +/* Concatenation is not done for object-like macros, but is done for both + * function-like macro bodies and their arguments. */ +static bool should_concat(struct preproc_ctx *ctx) +{ + struct preproc_macro *macro; + + if (!ctx->expansion_count) + return false; + macro = ctx->expansion_stack[ctx->expansion_count - 1].macro; + return !macro || macro->arg_count; +} + +static void preproc_pop_buffer(struct preproc_ctx *ctx) +{ + if (ctx->expansion_count) + { + struct preproc_expansion *exp = &ctx->expansion_stack[ctx->expansion_count - 1]; + + yy_delete_buffer(exp->buffer.lexer_buffer, ctx->scanner); + + --ctx->expansion_count; + TRACE("Expansion stack size is now %zu.\n", ctx->expansion_count); + } + else + { + struct preproc_file *file = preproc_get_top_file(ctx); + + if (ctx->file_count > 1) + preproc_close_include(ctx, &file->code); + + if (file->if_count) + { + const struct vkd3d_shader_location loc = {.source_name = file->filename}; + + preproc_warning(ctx, &loc, VKD3D_SHADER_WARNING_PP_UNTERMINATED_IF, "Unterminated #if block."); + } + vkd3d_free(file->if_stack); + + vkd3d_free(file->filename); + + yy_delete_buffer(file->buffer.lexer_buffer, ctx->scanner); + + --ctx->file_count; + TRACE("File stack size is now %zu.\n", ctx->file_count); + } + + if (ctx->expansion_count) + yy_switch_to_buffer(ctx->expansion_stack[ctx->expansion_count - 1].buffer.lexer_buffer, ctx->scanner); + else if (ctx->file_count) + yy_switch_to_buffer(ctx->file_stack[ctx->file_count - 1].buffer.lexer_buffer, ctx->scanner); +} + +static int return_token(int token, YYSTYPE *lval, const char *text) +{ + switch (token) + { + case T_HASHSTRING: + case T_IDENTIFIER: + case T_IDENTIFIER_PAREN: + case T_INTEGER: + case T_STRING: + case T_TEXT: + if (!(lval->string = vkd3d_strdup(text))) + return 0; + break; + } + + return token; +} + +static const struct preproc_text *find_arg_expansion(struct preproc_ctx *ctx, const char *s) +{ + struct preproc_macro *macro; + unsigned int i; + + if ((macro = preproc_get_top_macro(ctx))) + { + for (i = 0; i < macro->arg_count; ++i) + { + if (!strcmp(s, macro->arg_names[i])) + return ¯o->arg_values[i]; + } + } + return NULL; +} + +static void preproc_text_add(struct preproc_text *text, const char *string) +{ + vkd3d_string_buffer_printf(&text->text, "%s", string); +} + +static bool preproc_push_expansion(struct preproc_ctx *ctx, + const struct preproc_text *text, struct preproc_macro *macro) +{ + struct preproc_expansion *exp; + + if (!vkd3d_array_reserve((void **)&ctx->expansion_stack, &ctx->expansion_stack_size, + ctx->expansion_count + 1, sizeof(*ctx->expansion_stack))) + return false; + exp = &ctx->expansion_stack[ctx->expansion_count++]; + exp->text = text; + exp->buffer.lexer_buffer = yy_scan_bytes(text->text.buffer, text->text.content_size, ctx->scanner); + exp->buffer.location = text->location; + exp->macro = macro; + TRACE("Expansion stack size is now %zu.\n", ctx->expansion_count); + return true; +} + +int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) +{ + struct preproc_ctx *ctx = yyget_extra(scanner); + + for (;;) + { + struct preproc_func_state *func_state; + const char *text; + int token; + + if (ctx->lookahead_token) + { + token = ctx->lookahead_token; + text = yyget_text(scanner); + } + else + { + if (ctx->last_was_eof) + { + preproc_pop_buffer(ctx); + if (!ctx->file_count) + return 0; + } + ctx->last_was_eof = false; + + assert(ctx->file_count); + if (!(token = preproc_lexer_lex(lval, lloc, scanner))) + { + ctx->last_was_eof = true; + + /* If we have reached the end of an included file, inject a newline. */ + if (ctx->expansion_count) + continue; + token = T_NEWLINE; + text = "\n"; + } + else + { + text = yyget_text(scanner); + } + + if (ctx->last_was_newline) + { + switch (token) + { + case T_DEFINE: + case T_ELIF: + case T_ELSE: + case T_ENDIF: + case T_ERROR: + case T_IF: + case T_IFDEF: + case T_IFNDEF: + case T_INCLUDE: + case T_LINE: + case T_PRAGMA: + case T_UNDEF: + ctx->current_directive = token; + break; + + default: + ctx->current_directive = 0; + } + } + + ctx->last_was_newline = (token == T_NEWLINE); + } + + if (ctx->current_directive && token == T_DEFINED) + ctx->last_was_defined = true; + + func_state = ctx->current_directive ? &ctx->directive_func : &ctx->text_func; + + TRACE("Parsing token %d%s, line %d, in directive %d, state %#x, string %s.\n", + token, ctx->lookahead_token ? " (lookahead)" : "", lloc->line, + ctx->current_directive, func_state->state, debugstr_a(text)); + + ctx->lookahead_token = 0; + + switch (ctx->current_directive) + { + case T_ELIF: + case T_ELSE: + case T_ENDIF: + case T_IF: + case T_IFDEF: + case T_IFNDEF: + break; + + default: + if (!preproc_is_writing(ctx)) + continue; + } + + if (ctx->current_directive == T_PRAGMA) + { + /* Print all tokens verbatim. */ + if (token == T_PRAGMA) + vkd3d_string_buffer_printf(&ctx->buffer, "#pragma "); + else + vkd3d_string_buffer_printf(&ctx->buffer, "%s", text); + continue; + } + + switch (func_state->state) + { + case STATE_NONE: + { + struct preproc_macro *macro; + + if (token == T_CONCAT && should_concat(ctx)) + { + while (ctx->buffer.content_size + && strchr(" \t\r\n", ctx->buffer.buffer[ctx->buffer.content_size - 1])) + --ctx->buffer.content_size; + break; + } + + /* Stringification, however, is only done for function-like + * macro bodies. */ + if (token == T_HASHSTRING && (macro = preproc_get_top_macro(ctx)) && macro->arg_count) + { + const struct preproc_text *expansion; + const char *p = text + 1; + unsigned int i; + + if (ctx->current_directive) + return return_token(token, lval, text); + + while (*p == ' ' || *p == '\t') + ++p; + + vkd3d_string_buffer_printf(&ctx->buffer, "\""); + if ((expansion = find_arg_expansion(ctx, p))) + { + for (i = 0; i < expansion->text.content_size; ++i) + { + char c = expansion->text.buffer[i]; + + if (c == '\\' || c == '"') + vkd3d_string_buffer_printf(&ctx->buffer, "\\"); + vkd3d_string_buffer_printf(&ctx->buffer, "%c", c); + } + } + else + { + vkd3d_string_buffer_printf(&ctx->buffer, "%s", p); + } + vkd3d_string_buffer_printf(&ctx->buffer, "\""); + break; + } + + if (token == T_IDENTIFIER || token == T_IDENTIFIER_PAREN) + { + const struct preproc_text *expansion; + struct preproc_macro *macro; + + switch (ctx->current_directive) + { + case T_DEFINE: + case T_IFDEF: + case T_IFNDEF: + case T_UNDEF: + /* Return identifiers verbatim. */ + return return_token(token, lval, text); + + case T_IF: + case T_ELIF: + /* Return identifiers verbatim only if they're the + * argument to "defined". */ + if (ctx->last_was_defined) + { + ctx->last_was_defined = false; + return return_token(token, lval, text); + } + break; + } + + /* Otherwise, expand a macro if there is one. */ + + if ((expansion = find_arg_expansion(ctx, text))) + { + preproc_push_expansion(ctx, expansion, NULL); + continue; + } + + if ((macro = preproc_find_macro(ctx, text))) + { + if (!macro->arg_count) + { + preproc_push_expansion(ctx, ¯o->body, macro); + } + else + { + func_state->state = STATE_IDENTIFIER; + func_state->macro = macro; + } + continue; + } + + if (!strcmp(text, "__FILE__")) + { + const struct preproc_file *file = preproc_get_top_file(ctx); + + /* Not the current file name, but rather the file name + * before invoking any macros. */ + + if (ctx->current_directive) + { + char *string; + + if (!(string = vkd3d_malloc(strlen(file->filename) + 3))) + return 0; + sprintf(string, "\"%s\"", file->filename); + lval->string = string; + return T_STRING; + } + + if (preproc_is_writing(ctx)) + vkd3d_string_buffer_printf(&ctx->buffer, "\"%s\" ", file->filename); + continue; + } + + if (!strcmp(text, "__LINE__")) + { + const struct preproc_file *file = preproc_get_top_file(ctx); + + /* Not the current line number, but rather the line + * number before invoking any macros. */ + + if (ctx->current_directive) + { + char string[13]; + + sprintf(string, "%d", file->buffer.location.line); + return return_token(T_INTEGER, lval, string); + } + + if (preproc_is_writing(ctx)) + vkd3d_string_buffer_printf(&ctx->buffer, "%d ", file->buffer.location.line); + continue; + } + } + + if (ctx->current_directive) + return return_token(token, lval, text); + + vkd3d_string_buffer_printf(&ctx->buffer, "%s ", text); + break; + } + + case STATE_IDENTIFIER: + if (token == '(') + { + struct preproc_text *first_arg = &func_state->macro->arg_values[0]; + unsigned int i; + + func_state->arg_count = 0; + func_state->paren_depth = 1; + func_state->state = STATE_ARGS; + for (i = 0; i < func_state->macro->arg_count; ++i) + func_state->macro->arg_values[i].text.content_size = 0; + + first_arg->location = *lloc; + } + else + { + const char *name = func_state->macro->name; + + ctx->lookahead_token = token; + func_state->macro = NULL; + func_state->state = STATE_NONE; + + if (ctx->current_directive) + return return_token(T_IDENTIFIER, lval, name); + + vkd3d_string_buffer_printf(&ctx->buffer, "%s ", name); + } + break; + + case STATE_ARGS: + { + struct preproc_text *current_arg = NULL; + + assert(func_state->macro->arg_count); + + if (func_state->arg_count < func_state->macro->arg_count) + current_arg = &func_state->macro->arg_values[func_state->arg_count]; + + switch (token) + { + case T_NEWLINE: + if (current_arg) + preproc_text_add(current_arg, " "); + break; + + case ')': + case ']': + case '}': + if (!--func_state->paren_depth) + { + if (++func_state->arg_count == func_state->macro->arg_count) + { + preproc_push_expansion(ctx, &func_state->macro->body, func_state->macro); + } + else + { + preproc_warning(ctx, lloc, VKD3D_SHADER_WARNING_PP_ARGUMENT_COUNT_MISMATCH, + "Wrong number of arguments to macro \"%s\": expected %zu, got %zu.", + func_state->macro->name, func_state->macro->arg_count, func_state->arg_count); + + if (ctx->current_directive) + return return_token(T_IDENTIFIER, lval, func_state->macro->name); + + vkd3d_string_buffer_printf(&ctx->buffer, "%s ", func_state->macro->name); + } + func_state->macro = NULL; + func_state->state = STATE_NONE; + } + else + { + if (current_arg) + preproc_text_add(current_arg, text); + } + break; + + case ',': + if (func_state->paren_depth == 1) + { + ++func_state->arg_count; + if (current_arg) + current_arg->location = *lloc; + } + else if (current_arg) + { + preproc_text_add(current_arg, text); + } + break; + + case '(': + case '[': + case '{': + ++func_state->paren_depth; + /* fall through */ + + default: + if (current_arg) + preproc_text_add(current_arg, text); + } + break; + } + } + } +} + +bool preproc_push_include(struct preproc_ctx *ctx, char *filename, const struct vkd3d_shader_code *code) +{ + struct preproc_file *file; + + if (!vkd3d_array_reserve((void **)&ctx->file_stack, &ctx->file_stack_size, + ctx->file_count + 1, sizeof(*ctx->file_stack))) + return false; + file = &ctx->file_stack[ctx->file_count++]; + memset(file, 0, sizeof(*file)); + file->code = *code; + file->filename = filename; + file->buffer.lexer_buffer = yy_scan_bytes(code->code, code->size, ctx->scanner); + file->buffer.location.source_name = file->filename; + file->buffer.location.line = 1; + file->buffer.location.column = 1; + TRACE("File stack size is now %zu.\n", ctx->file_count); + ctx->last_was_newline = true; + return true; +} + +static int preproc_macro_compare(const void *key, const struct rb_entry *entry) +{ + const struct preproc_macro *macro = RB_ENTRY_VALUE(entry, struct preproc_macro, entry); + const char *name = key; + + return strcmp(name, macro->name); +} + +static void preproc_macro_rb_free(struct rb_entry *entry, void *ctx) +{ + preproc_free_macro(RB_ENTRY_VALUE(entry, struct preproc_macro, entry)); +} + +int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + static const struct vkd3d_shader_preprocess_info default_preprocess_info = {0}; + struct preproc_ctx ctx = {0}; + char *source_name = NULL; + void *output_code; + unsigned int i; + + vkd3d_string_buffer_init(&ctx.buffer); + rb_init(&ctx.macros, preproc_macro_compare); + if (!(ctx.preprocess_info = vkd3d_find_struct(compile_info->next, PREPROCESS_INFO))) + ctx.preprocess_info = &default_preprocess_info; + ctx.message_context = message_context; + + if (!(source_name = vkd3d_strdup(compile_info->source_name ? compile_info->source_name : ""))) + goto fail; + + for (i = 0; i < ctx.preprocess_info->macro_count; ++i) + { + const struct vkd3d_shader_location loc = {.source_name = source_name}; + struct vkd3d_string_buffer body; + char *name; + + vkd3d_string_buffer_init(&body); + vkd3d_string_buffer_printf(&body, "%s", ctx.preprocess_info->macros[i].value); + if (!(name = vkd3d_strdup(ctx.preprocess_info->macros[i].name))) + { + vkd3d_string_buffer_cleanup(&body); + goto fail; + } + if (!preproc_add_macro(&ctx, &loc, name, NULL, 0, &loc, &body)) + { + vkd3d_free(name); + vkd3d_string_buffer_cleanup(&body); + goto fail; + } + } + + yylex_init_extra(&ctx, &ctx.scanner); + if (!preproc_push_include(&ctx, source_name, &compile_info->source)) + { + yylex_destroy(ctx.scanner); + goto fail; + } + + preproc_yyparse(ctx.scanner, &ctx); + + switch (ctx.text_func.state) + { + case STATE_NONE: + break; + + case STATE_ARGS: + { + const struct vkd3d_shader_location loc = {.source_name = source_name}; + + preproc_warning(&ctx, &loc, VKD3D_SHADER_WARNING_PP_UNTERMINATED_MACRO, + "Unterminated macro invocation."); + } + /* fall through */ + + case STATE_IDENTIFIER: + if (preproc_is_writing(&ctx)) + vkd3d_string_buffer_printf(&ctx.buffer, "%s ", ctx.text_func.macro->name); + break; + } + + while (ctx.file_count) + preproc_pop_buffer(&ctx); + yylex_destroy(ctx.scanner); + + rb_destroy(&ctx.macros, preproc_macro_rb_free, NULL); + vkd3d_free(ctx.file_stack); + vkd3d_free(ctx.expansion_stack); + + if (ctx.error) + { + WARN("Failed to preprocess.\n"); + vkd3d_string_buffer_cleanup(&ctx.buffer); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!(output_code = vkd3d_malloc(ctx.buffer.content_size))) + { + vkd3d_string_buffer_cleanup(&ctx.buffer); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + memcpy(output_code, ctx.buffer.buffer, ctx.buffer.content_size); + out->size = ctx.buffer.content_size; + out->code = output_code; + vkd3d_string_buffer_trace(&ctx.buffer); + vkd3d_string_buffer_cleanup(&ctx.buffer); + return VKD3D_OK; + +fail: + rb_destroy(&ctx.macros, preproc_macro_rb_free, NULL); + vkd3d_free(source_name); + vkd3d_string_buffer_cleanup(&ctx.buffer); + return VKD3D_ERROR_OUT_OF_MEMORY; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.y b/libs/vkd3d/libs/vkd3d-shader/preproc.y new file mode 100644 index 00000000000..3f02ac03612 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.y @@ -0,0 +1,829 @@ +/* + * HLSL preprocessor + * + * Copyright 2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +%code requires +{ + +#include "vkd3d_shader_private.h" +#include "preproc.h" +#include +#include + +#define PREPROC_YYLTYPE struct vkd3d_shader_location + +struct parse_arg_names +{ + char **args; + size_t count; +}; + +} + +%code provides +{ + +int preproc_yylex(PREPROC_YYSTYPE *yylval_param, PREPROC_YYLTYPE *yylloc_param, void *scanner); + +} + +%code +{ + +#define YYLLOC_DEFAULT(cur, rhs, n) (cur) = YYRHSLOC(rhs, !!n) + +#ifndef S_ISREG +# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#endif + +static void preproc_error(struct preproc_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_verror(ctx->message_context, loc, error, format, args); + va_end(args); + ctx->error = true; +} + +void preproc_warning(struct preproc_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_vwarning(ctx->message_context, loc, error, format, args); + va_end(args); +} + +static void yyerror(const YYLTYPE *loc, void *scanner, struct preproc_ctx *ctx, const char *string) +{ + preproc_error(ctx, loc, VKD3D_SHADER_ERROR_PP_INVALID_SYNTAX, "%s", string); +} + +struct preproc_macro *preproc_find_macro(struct preproc_ctx *ctx, const char *name) +{ + struct rb_entry *entry; + + if ((entry = rb_get(&ctx->macros, name))) + return RB_ENTRY_VALUE(entry, struct preproc_macro, entry); + return NULL; +} + +bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_location *loc, char *name, char **arg_names, + size_t arg_count, const struct vkd3d_shader_location *body_loc, struct vkd3d_string_buffer *body) +{ + struct preproc_macro *macro; + unsigned int i; + int ret; + + if ((macro = preproc_find_macro(ctx, name))) + { + preproc_warning(ctx, loc, VKD3D_SHADER_WARNING_PP_ALREADY_DEFINED, "Redefinition of %s.", name); + rb_remove(&ctx->macros, ¯o->entry); + preproc_free_macro(macro); + } + + TRACE("Defining new macro %s with %zu arguments.\n", debugstr_a(name), arg_count); + + if (!(macro = vkd3d_malloc(sizeof(*macro)))) + return false; + macro->name = name; + macro->arg_names = arg_names; + macro->arg_count = arg_count; + macro->arg_values = NULL; + if (arg_count && !(macro->arg_values = vkd3d_calloc(arg_count, sizeof(*macro->arg_values)))) + { + vkd3d_free(macro); + return false; + } + for (i = 0; i < arg_count; ++i) + vkd3d_string_buffer_init(¯o->arg_values[i].text); + macro->body.text = *body; + macro->body.location = *body_loc; + ret = rb_put(&ctx->macros, name, ¯o->entry); + assert(!ret); + return true; +} + +void preproc_free_macro(struct preproc_macro *macro) +{ + unsigned int i; + + vkd3d_free(macro->name); + for (i = 0; i < macro->arg_count; ++i) + { + vkd3d_string_buffer_cleanup(¯o->arg_values[i].text); + vkd3d_free(macro->arg_names[i]); + } + vkd3d_free(macro->arg_names); + vkd3d_free(macro->arg_values); + vkd3d_string_buffer_cleanup(¯o->body.text); + vkd3d_free(macro); +} + +static bool preproc_was_writing(struct preproc_ctx *ctx) +{ + const struct preproc_file *file = preproc_get_top_file(ctx); + + /* This applies across files, since we can't #include anyway if we weren't + * writing. */ + if (file->if_count < 2) + return true; + return file->if_stack[file->if_count - 2].current_true; +} + +static bool preproc_push_if(struct preproc_ctx *ctx, bool condition) +{ + struct preproc_file *file = preproc_get_top_file(ctx); + struct preproc_if_state *state; + + if (!vkd3d_array_reserve((void **)&file->if_stack, &file->if_stack_size, + file->if_count + 1, sizeof(*file->if_stack))) + return false; + state = &file->if_stack[file->if_count++]; + state->current_true = condition && preproc_was_writing(ctx); + state->seen_true = condition; + state->seen_else = false; + return true; +} + +static int char_to_int(char c) +{ + if ('0' <= c && c <= '9') + return c - '0'; + if ('A' <= c && c <= 'F') + return c - 'A' + 10; + if ('a' <= c && c <= 'f') + return c - 'a' + 10; + return -1; +} + +static uint32_t preproc_parse_integer(const char *s) +{ + uint32_t base = 10, ret = 0; + int digit; + + if (*s == '0') + { + base = 8; + ++s; + if (*s == 'x' || *s == 'X') + { + base = 16; + ++s; + } + } + + while ((digit = char_to_int(*s++)) >= 0) + ret = ret * base + (uint32_t)digit; + return ret; +} + +static int default_open_include(const char *filename, bool local, + const char *parent_data, void *context, struct vkd3d_shader_code *out) +{ + uint8_t *data, *new_data; + size_t size = 4096; + struct stat st; + size_t pos = 0; + size_t ret; + FILE *f; + + if (!(f = fopen(filename, "rb"))) + { + ERR("Unable to open %s for reading.\n", debugstr_a(filename)); + return VKD3D_ERROR; + } + + if (fstat(fileno(f), &st) == -1) + { + ERR("Could not stat file %s.\n", debugstr_a(filename)); + fclose(f); + return VKD3D_ERROR; + } + + if (S_ISREG(st.st_mode)) + size = st.st_size; + + if (!(data = vkd3d_malloc(size))) + { + fclose(f); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (;;) + { + if (pos >= size) + { + if (size > SIZE_MAX / 2 || !(new_data = vkd3d_realloc(data, size * 2))) + { + vkd3d_free(data); + fclose(f); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + data = new_data; + size *= 2; + } + + if (!(ret = fread(&data[pos], 1, size - pos, f))) + break; + pos += ret; + } + + if (!feof(f)) + { + vkd3d_free(data); + return VKD3D_ERROR; + } + + fclose(f); + + out->code = data; + out->size = pos; + + return VKD3D_OK; +} + +static void default_close_include(const struct vkd3d_shader_code *code, void *context) +{ + vkd3d_free((void *)code->code); +} + +void preproc_close_include(struct preproc_ctx *ctx, const struct vkd3d_shader_code *code) +{ + PFN_vkd3d_shader_close_include close_include = ctx->preprocess_info->pfn_close_include; + + if (!close_include) + close_include = default_close_include; + + close_include(code, ctx->preprocess_info->include_context); +} + +static const void *get_parent_data(struct preproc_ctx *ctx) +{ + if (ctx->file_count == 1) + return NULL; + return preproc_get_top_file(ctx)->code.code; +} + +static void free_parse_arg_names(struct parse_arg_names *args) +{ + unsigned int i; + + for (i = 0; i < args->count; ++i) + vkd3d_free(args->args[i]); + vkd3d_free(args->args); +} + +} + +%define api.prefix {preproc_yy} +%define api.pure full +%define parse.error verbose +%expect 0 +%locations +%lex-param {yyscan_t scanner} +%parse-param {void *scanner} +%parse-param {struct preproc_ctx *ctx} + +%union +{ + char *string; + const char *const_string; + uint32_t integer; + struct vkd3d_string_buffer string_buffer; + struct parse_arg_names arg_names; +} + +%token T_HASHSTRING +%token T_IDENTIFIER +%token T_IDENTIFIER_PAREN +%token T_INTEGER +%token T_STRING +%token T_TEXT + +%token T_NEWLINE + +%token T_DEFINE "#define" +%token T_ERROR "#error" +%token T_ELIF "#elif" +%token T_ELSE "#else" +%token T_ENDIF "#endif" +%token T_IF "#if" +%token T_IFDEF "#ifdef" +%token T_IFNDEF "#ifndef" +%token T_INCLUDE "#include" +%token T_LINE "#line" +%token T_PRAGMA "#pragma" +%token T_UNDEF "#undef" + +%token T_CONCAT "##" + +%token T_LE "<=" +%token T_GE ">=" +%token T_EQ "==" +%token T_NE "!=" +%token T_AND "&&" +%token T_OR "||" +%token T_DEFINED "defined" + +%type primary_expr +%type unary_expr +%type mul_expr +%type add_expr +%type ineq_expr +%type eq_expr +%type bitand_expr +%type bitxor_expr +%type bitor_expr +%type logicand_expr +%type logicor_expr +%type expr +%type body_token +%type body_token_const +%type body_text +%type identifier_list + +%% + +shader_text + : %empty + | shader_text directive + { + vkd3d_string_buffer_printf(&ctx->buffer, "\n"); + } + +identifier_list + : T_IDENTIFIER + { + if (!($$.args = vkd3d_malloc(sizeof(*$$.args)))) + YYABORT; + $$.args[0] = $1; + $$.count = 1; + } + | identifier_list ',' T_IDENTIFIER + { + char **new_array; + + if (!(new_array = vkd3d_realloc($1.args, ($1.count + 1) * sizeof(*$$.args)))) + { + free_parse_arg_names(&$1); + YYABORT; + } + $$.args = new_array; + $$.count = $1.count + 1; + $$.args[$1.count] = $3; + } + +body_text + : %empty + { + vkd3d_string_buffer_init(&$$); + } + | body_text body_token + { + if (vkd3d_string_buffer_printf(&$$, "%s ", $2) < 0) + { + vkd3d_free($2); + YYABORT; + } + vkd3d_free($2); + } + | body_text body_token_const + { + if (vkd3d_string_buffer_printf(&$$, "%s ", $2) < 0) + YYABORT; + } + +body_token + : T_HASHSTRING + | T_IDENTIFIER + | T_IDENTIFIER_PAREN + | T_INTEGER + | T_TEXT + +body_token_const + : '(' + { + $$ = "("; + } + | ')' + { + $$ = ")"; + } + | '[' + { + $$ = "["; + } + | ']' + { + $$ = "]"; + } + | '{' + { + $$ = "{"; + } + | '}' + { + $$ = "}"; + } + | ',' + { + $$ = ","; + } + | '+' + { + $$ = "+"; + } + | '-' + { + $$ = "-"; + } + | '!' + { + $$ = "!"; + } + | '*' + { + $$ = "*"; + } + | '/' + { + $$ = "/"; + } + | '<' + { + $$ = "<"; + } + | '>' + { + $$ = ">"; + } + | '&' + { + $$ = "&"; + } + | '|' + { + $$ = "|"; + } + | '^' + { + $$ = "^"; + } + | '?' + { + $$ = "?"; + } + | ':' + { + $$ = ":"; + } + | T_CONCAT + { + $$ = "##"; + } + | T_LE + { + $$ = "<="; + } + | T_GE + { + $$ = ">="; + } + | T_EQ + { + $$ = "=="; + } + | T_NE + { + $$ = "!="; + } + | T_AND + { + $$ = "&&"; + } + | T_OR + { + $$ = "||"; + } + | T_DEFINED + { + $$ = "defined"; + } + +directive + : T_DEFINE T_IDENTIFIER body_text T_NEWLINE + { + if (!preproc_add_macro(ctx, &@$, $2, NULL, 0, &@3, &$3)) + { + vkd3d_free($2); + vkd3d_string_buffer_cleanup(&$3); + YYABORT; + } + } + | T_DEFINE T_IDENTIFIER_PAREN '(' identifier_list ')' body_text T_NEWLINE + { + if (!preproc_add_macro(ctx, &@6, $2, $4.args, $4.count, &@6, &$6)) + { + vkd3d_free($2); + free_parse_arg_names(&$4); + vkd3d_string_buffer_cleanup(&$6); + YYABORT; + } + } + | T_UNDEF T_IDENTIFIER T_NEWLINE + { + struct preproc_macro *macro; + + if ((macro = preproc_find_macro(ctx, $2))) + { + TRACE("Removing macro definition %s.\n", debugstr_a($2)); + rb_remove(&ctx->macros, ¯o->entry); + preproc_free_macro(macro); + } + vkd3d_free($2); + } + | T_IF expr T_NEWLINE + { + if (!preproc_push_if(ctx, !!$2)) + YYABORT; + } + | T_IFDEF T_IDENTIFIER T_NEWLINE + { + preproc_push_if(ctx, !!preproc_find_macro(ctx, $2)); + vkd3d_free($2); + } + | T_IFNDEF T_IDENTIFIER T_NEWLINE + { + preproc_push_if(ctx, !preproc_find_macro(ctx, $2)); + vkd3d_free($2); + } + | T_ELIF expr T_NEWLINE + { + const struct preproc_file *file = preproc_get_top_file(ctx); + + if (file->if_count) + { + struct preproc_if_state *state = &file->if_stack[file->if_count - 1]; + + if (state->seen_else) + { + preproc_warning(ctx, &@$, VKD3D_SHADER_WARNING_PP_INVALID_DIRECTIVE, "Ignoring #elif after #else."); + } + else + { + state->current_true = $2 && !state->seen_true && preproc_was_writing(ctx); + state->seen_true = $2 || state->seen_true; + } + } + else + { + preproc_warning(ctx, &@$, VKD3D_SHADER_WARNING_PP_INVALID_DIRECTIVE, + "Ignoring #elif without prior #if."); + } + } + | T_ELSE T_NEWLINE + { + const struct preproc_file *file = preproc_get_top_file(ctx); + + if (file->if_count) + { + struct preproc_if_state *state = &file->if_stack[file->if_count - 1]; + + if (state->seen_else) + { + preproc_warning(ctx, &@$, VKD3D_SHADER_WARNING_PP_INVALID_DIRECTIVE, "Ignoring #else after #else."); + } + else + { + state->current_true = !state->seen_true && preproc_was_writing(ctx); + state->seen_else = true; + } + } + else + { + preproc_warning(ctx, &@$, VKD3D_SHADER_WARNING_PP_INVALID_DIRECTIVE, + "Ignoring #else without prior #if."); + } + } + | T_ENDIF T_NEWLINE + { + struct preproc_file *file = preproc_get_top_file(ctx); + + if (file->if_count) + --file->if_count; + else + preproc_warning(ctx, &@$, VKD3D_SHADER_WARNING_PP_INVALID_DIRECTIVE, + "Ignoring #endif without prior #if."); + } + | T_ERROR T_NEWLINE + { + preproc_error(ctx, &@$, VKD3D_SHADER_ERROR_PP_ERROR_DIRECTIVE, "Error directive."); + } + | T_ERROR T_STRING T_NEWLINE + { + preproc_error(ctx, &@$, VKD3D_SHADER_ERROR_PP_ERROR_DIRECTIVE, "Error directive: %s", $2); + vkd3d_free($2); + } + | T_INCLUDE T_STRING T_NEWLINE + { + PFN_vkd3d_shader_open_include open_include = ctx->preprocess_info->pfn_open_include; + struct vkd3d_shader_code code; + char *filename; + int result; + + if (!(filename = vkd3d_malloc(strlen($2) - 1))) + YYABORT; + + if (!open_include) + open_include = default_open_include; + + memcpy(filename, $2 + 1, strlen($2) - 2); + filename[strlen($2) - 2] = 0; + + if (!(result = open_include(filename, $2[0] == '"', get_parent_data(ctx), + ctx->preprocess_info->include_context, &code))) + { + if (!preproc_push_include(ctx, filename, &code)) + { + preproc_close_include(ctx, &code); + vkd3d_free(filename); + } + } + else + { + preproc_error(ctx, &@$, VKD3D_SHADER_ERROR_PP_INCLUDE_FAILED, "Failed to open %s.", $2); + vkd3d_free(filename); + } + vkd3d_free($2); + } + | T_LINE T_INTEGER T_NEWLINE + { + FIXME("#line directive.\n"); + vkd3d_free($2); + } + | T_LINE T_INTEGER T_STRING T_NEWLINE + { + FIXME("#line directive.\n"); + vkd3d_free($2); + vkd3d_free($3); + } + +primary_expr + : T_INTEGER + { + $$ = preproc_parse_integer($1); + vkd3d_free($1); + } + | T_IDENTIFIER + { + $$ = 0; + vkd3d_free($1); + } + | T_DEFINED T_IDENTIFIER + { + $$ = !!preproc_find_macro(ctx, $2); + vkd3d_free($2); + } + | T_DEFINED '(' T_IDENTIFIER ')' + { + $$ = !!preproc_find_macro(ctx, $3); + vkd3d_free($3); + } + | '(' expr ')' + { + $$ = $2; + } + +unary_expr + : primary_expr + | '+' unary_expr + { + $$ = $2; + } + | '-' unary_expr + { + $$ = -$2; + } + | '!' unary_expr + { + $$ = !$2; + } + +mul_expr + : unary_expr + | mul_expr '*' unary_expr + { + $$ = $1 * $3; + } + | mul_expr '/' unary_expr + { + if (!$3) + { + preproc_warning(ctx, &@3, VKD3D_SHADER_WARNING_PP_DIV_BY_ZERO, "Division by zero."); + $3 = 1; + } + $$ = $1 / $3; + } + +add_expr + : mul_expr + | add_expr '+' mul_expr + { + $$ = $1 + $3; + } + | add_expr '-' mul_expr + { + $$ = $1 - $3; + } + +ineq_expr + : add_expr + | ineq_expr '<' add_expr + { + $$ = $1 < $3; + } + | ineq_expr '>' add_expr + { + $$ = $1 > $3; + } + | ineq_expr T_LE add_expr + { + $$ = $1 <= $3; + } + | ineq_expr T_GE add_expr + { + $$ = $1 >= $3; + } + +eq_expr + : ineq_expr + | eq_expr T_EQ ineq_expr + { + $$ = $1 == $3; + } + | eq_expr T_NE ineq_expr + { + $$ = $1 != $3; + } + +bitand_expr + : eq_expr + | bitand_expr '&' eq_expr + { + $$ = $1 & $3; + } + +bitxor_expr + : bitand_expr + | bitxor_expr '^' bitand_expr + { + $$ = $1 ^ $3; + } + +bitor_expr + : bitxor_expr + | bitor_expr '|' bitxor_expr + { + $$ = $1 | $3; + } + +logicand_expr + : bitor_expr + | logicand_expr T_AND bitor_expr + { + $$ = $1 && $3; + } + +logicor_expr + : logicand_expr + | logicor_expr T_OR logicand_expr + { + $$ = $1 || $3; + } + +expr + : logicor_expr + | expr '?' logicor_expr ':' logicor_expr + { + $$ = $1 ? $3 : $5; + } diff --git a/libs/vkd3d/libs/vkd3d-shader/sm4.h b/libs/vkd3d/libs/vkd3d-shader/sm4.h new file mode 100644 index 00000000000..62bb4d12b8a --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/sm4.h @@ -0,0 +1,548 @@ +/* + * Copyright 2009 Henri Verbeet for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_SM4_H +#define __VKD3D_SM4_H + +#define VKD3D_SM4_PS 0x0000u +#define VKD3D_SM4_VS 0x0001u +#define VKD3D_SM4_GS 0x0002u +#define VKD3D_SM5_HS 0x0003u +#define VKD3D_SM5_DS 0x0004u +#define VKD3D_SM5_CS 0x0005u +#define VKD3D_SM4_LIB 0xfff0u + +#define VKD3D_SM4_INSTRUCTION_MODIFIER (0x1u << 31) + +#define VKD3D_SM4_MODIFIER_MASK 0x3fu + +#define VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT 6 +#define VKD3D_SM5_MODIFIER_DATA_TYPE_MASK (0xffffu << VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT) + +#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT 6 +#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT 11 +#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK (0xfffu << VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT) + +#define VKD3D_SM4_AOFFIMMI_U_SHIFT 9 +#define VKD3D_SM4_AOFFIMMI_U_MASK (0xfu << VKD3D_SM4_AOFFIMMI_U_SHIFT) +#define VKD3D_SM4_AOFFIMMI_V_SHIFT 13 +#define VKD3D_SM4_AOFFIMMI_V_MASK (0xfu << VKD3D_SM4_AOFFIMMI_V_SHIFT) +#define VKD3D_SM4_AOFFIMMI_W_SHIFT 17 +#define VKD3D_SM4_AOFFIMMI_W_MASK (0xfu << VKD3D_SM4_AOFFIMMI_W_SHIFT) + +#define VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT 24 +#define VKD3D_SM4_INSTRUCTION_LENGTH_MASK (0x1fu << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT) + +#define VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT 11 +#define VKD3D_SM4_INSTRUCTION_FLAGS_MASK (0x7u << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT) + +#define VKD3D_SM4_RESOURCE_TYPE_SHIFT 11 +#define VKD3D_SM4_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM4_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM4_PRIMITIVE_TYPE_SHIFT 11 +#define VKD3D_SM4_PRIMITIVE_TYPE_MASK (0x3fu << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT) + +#define VKD3D_SM4_INDEX_TYPE_SHIFT 11 +#define VKD3D_SM4_INDEX_TYPE_MASK (0x1u << VKD3D_SM4_INDEX_TYPE_SHIFT) + +#define VKD3D_SM4_SAMPLER_MODE_SHIFT 11 +#define VKD3D_SM4_SAMPLER_MODE_MASK (0xfu << VKD3D_SM4_SAMPLER_MODE_SHIFT) + +#define VKD3D_SM4_SHADER_DATA_TYPE_SHIFT 11 +#define VKD3D_SM4_SHADER_DATA_TYPE_MASK (0xfu << VKD3D_SM4_SHADER_DATA_TYPE_SHIFT) + +#define VKD3D_SM4_INTERPOLATION_MODE_SHIFT 11 +#define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) + +#define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 +#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) + +#define VKD3D_SM5_PRECISE_SHIFT 19 +#define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) + +#define VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT 11 +#define VKD3D_SM5_CONTROL_POINT_COUNT_MASK (0xffu << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT) + +#define VKD3D_SM5_FP_ARRAY_SIZE_SHIFT 16 +#define VKD3D_SM5_FP_TABLE_COUNT_MASK 0xffffu + +#define VKD3D_SM5_UAV_FLAGS_SHIFT 15 +#define VKD3D_SM5_UAV_FLAGS_MASK (0x1ffu << VKD3D_SM5_UAV_FLAGS_SHIFT) + +#define VKD3D_SM5_SYNC_FLAGS_SHIFT 11 +#define VKD3D_SM5_SYNC_FLAGS_MASK (0xffu << VKD3D_SM5_SYNC_FLAGS_SHIFT) + +#define VKD3D_SM5_TESSELLATOR_SHIFT 11 +#define VKD3D_SM5_TESSELLATOR_MASK (0xfu << VKD3D_SM5_TESSELLATOR_SHIFT) + +#define VKD3D_SM4_OPCODE_MASK 0xff + +#define VKD3D_SM4_EXTENDED_OPERAND (0x1u << 31) + +#define VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK 0x3fu + +#define VKD3D_SM4_REGISTER_MODIFIER_SHIFT 6 +#define VKD3D_SM4_REGISTER_MODIFIER_MASK (0xffu << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + +#define VKD3D_SM4_REGISTER_PRECISION_SHIFT 14 +#define VKD3D_SM4_REGISTER_PRECISION_MASK (0x7u << VKD3D_SM4_REGISTER_PRECISION_SHIFT) + +#define VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT 17 +#define VKD3D_SM4_REGISTER_NON_UNIFORM_MASK (0x1u << VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT) + +#define VKD3D_SM4_ADDRESSING_SHIFT2 28 +#define VKD3D_SM4_ADDRESSING_MASK2 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT2) + +#define VKD3D_SM4_ADDRESSING_SHIFT1 25 +#define VKD3D_SM4_ADDRESSING_MASK1 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT1) + +#define VKD3D_SM4_ADDRESSING_SHIFT0 22 +#define VKD3D_SM4_ADDRESSING_MASK0 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT0) + +#define VKD3D_SM4_REGISTER_ORDER_SHIFT 20 +#define VKD3D_SM4_REGISTER_ORDER_MASK (0x3u << VKD3D_SM4_REGISTER_ORDER_SHIFT) + +#define VKD3D_SM4_REGISTER_TYPE_SHIFT 12 +#define VKD3D_SM4_REGISTER_TYPE_MASK (0xffu << VKD3D_SM4_REGISTER_TYPE_SHIFT) + +#define VKD3D_SM4_SWIZZLE_TYPE_SHIFT 2 +#define VKD3D_SM4_SWIZZLE_TYPE_MASK (0x3u << VKD3D_SM4_SWIZZLE_TYPE_SHIFT) + +#define VKD3D_SM4_DIMENSION_SHIFT 0 +#define VKD3D_SM4_DIMENSION_MASK (0x3u << VKD3D_SM4_DIMENSION_SHIFT) + +#define VKD3D_SM4_WRITEMASK_SHIFT 4 +#define VKD3D_SM4_WRITEMASK_MASK (0xfu << VKD3D_SM4_WRITEMASK_SHIFT) + +#define VKD3D_SM4_SWIZZLE_SHIFT 4 +#define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT) + +#define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) +#define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf) + +#define VKD3D_SM4_ADDRESSING_RELATIVE 0x2 +#define VKD3D_SM4_ADDRESSING_OFFSET 0x1 + +#define VKD3D_SM4_INSTRUCTION_FLAG_SATURATE 0x4 + +#define VKD3D_SM4_CONDITIONAL_NZ (0x1u << 18) + +#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) + +/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ +#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 + +enum vkd3d_sm4_opcode +{ + VKD3D_SM4_OP_ADD = 0x00, + VKD3D_SM4_OP_AND = 0x01, + VKD3D_SM4_OP_BREAK = 0x02, + VKD3D_SM4_OP_BREAKC = 0x03, + VKD3D_SM4_OP_CASE = 0x06, + VKD3D_SM4_OP_CONTINUE = 0x07, + VKD3D_SM4_OP_CONTINUEC = 0x08, + VKD3D_SM4_OP_CUT = 0x09, + VKD3D_SM4_OP_DEFAULT = 0x0a, + VKD3D_SM4_OP_DERIV_RTX = 0x0b, + VKD3D_SM4_OP_DERIV_RTY = 0x0c, + VKD3D_SM4_OP_DISCARD = 0x0d, + VKD3D_SM4_OP_DIV = 0x0e, + VKD3D_SM4_OP_DP2 = 0x0f, + VKD3D_SM4_OP_DP3 = 0x10, + VKD3D_SM4_OP_DP4 = 0x11, + VKD3D_SM4_OP_ELSE = 0x12, + VKD3D_SM4_OP_EMIT = 0x13, + VKD3D_SM4_OP_ENDIF = 0x15, + VKD3D_SM4_OP_ENDLOOP = 0x16, + VKD3D_SM4_OP_ENDSWITCH = 0x17, + VKD3D_SM4_OP_EQ = 0x18, + VKD3D_SM4_OP_EXP = 0x19, + VKD3D_SM4_OP_FRC = 0x1a, + VKD3D_SM4_OP_FTOI = 0x1b, + VKD3D_SM4_OP_FTOU = 0x1c, + VKD3D_SM4_OP_GE = 0x1d, + VKD3D_SM4_OP_IADD = 0x1e, + VKD3D_SM4_OP_IF = 0x1f, + VKD3D_SM4_OP_IEQ = 0x20, + VKD3D_SM4_OP_IGE = 0x21, + VKD3D_SM4_OP_ILT = 0x22, + VKD3D_SM4_OP_IMAD = 0x23, + VKD3D_SM4_OP_IMAX = 0x24, + VKD3D_SM4_OP_IMIN = 0x25, + VKD3D_SM4_OP_IMUL = 0x26, + VKD3D_SM4_OP_INE = 0x27, + VKD3D_SM4_OP_INEG = 0x28, + VKD3D_SM4_OP_ISHL = 0x29, + VKD3D_SM4_OP_ISHR = 0x2a, + VKD3D_SM4_OP_ITOF = 0x2b, + VKD3D_SM4_OP_LABEL = 0x2c, + VKD3D_SM4_OP_LD = 0x2d, + VKD3D_SM4_OP_LD2DMS = 0x2e, + VKD3D_SM4_OP_LOG = 0x2f, + VKD3D_SM4_OP_LOOP = 0x30, + VKD3D_SM4_OP_LT = 0x31, + VKD3D_SM4_OP_MAD = 0x32, + VKD3D_SM4_OP_MIN = 0x33, + VKD3D_SM4_OP_MAX = 0x34, + VKD3D_SM4_OP_SHADER_DATA = 0x35, + VKD3D_SM4_OP_MOV = 0x36, + VKD3D_SM4_OP_MOVC = 0x37, + VKD3D_SM4_OP_MUL = 0x38, + VKD3D_SM4_OP_NE = 0x39, + VKD3D_SM4_OP_NOP = 0x3a, + VKD3D_SM4_OP_NOT = 0x3b, + VKD3D_SM4_OP_OR = 0x3c, + VKD3D_SM4_OP_RESINFO = 0x3d, + VKD3D_SM4_OP_RET = 0x3e, + VKD3D_SM4_OP_RETC = 0x3f, + VKD3D_SM4_OP_ROUND_NE = 0x40, + VKD3D_SM4_OP_ROUND_NI = 0x41, + VKD3D_SM4_OP_ROUND_PI = 0x42, + VKD3D_SM4_OP_ROUND_Z = 0x43, + VKD3D_SM4_OP_RSQ = 0x44, + VKD3D_SM4_OP_SAMPLE = 0x45, + VKD3D_SM4_OP_SAMPLE_C = 0x46, + VKD3D_SM4_OP_SAMPLE_C_LZ = 0x47, + VKD3D_SM4_OP_SAMPLE_LOD = 0x48, + VKD3D_SM4_OP_SAMPLE_GRAD = 0x49, + VKD3D_SM4_OP_SAMPLE_B = 0x4a, + VKD3D_SM4_OP_SQRT = 0x4b, + VKD3D_SM4_OP_SWITCH = 0x4c, + VKD3D_SM4_OP_SINCOS = 0x4d, + VKD3D_SM4_OP_UDIV = 0x4e, + VKD3D_SM4_OP_ULT = 0x4f, + VKD3D_SM4_OP_UGE = 0x50, + VKD3D_SM4_OP_UMUL = 0x51, + VKD3D_SM4_OP_UMAX = 0x53, + VKD3D_SM4_OP_UMIN = 0x54, + VKD3D_SM4_OP_USHR = 0x55, + VKD3D_SM4_OP_UTOF = 0x56, + VKD3D_SM4_OP_XOR = 0x57, + VKD3D_SM4_OP_DCL_RESOURCE = 0x58, + VKD3D_SM4_OP_DCL_CONSTANT_BUFFER = 0x59, + VKD3D_SM4_OP_DCL_SAMPLER = 0x5a, + VKD3D_SM4_OP_DCL_INDEX_RANGE = 0x5b, + VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY = 0x5c, + VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE = 0x5d, + VKD3D_SM4_OP_DCL_VERTICES_OUT = 0x5e, + VKD3D_SM4_OP_DCL_INPUT = 0x5f, + VKD3D_SM4_OP_DCL_INPUT_SGV = 0x60, + VKD3D_SM4_OP_DCL_INPUT_SIV = 0x61, + VKD3D_SM4_OP_DCL_INPUT_PS = 0x62, + VKD3D_SM4_OP_DCL_INPUT_PS_SGV = 0x63, + VKD3D_SM4_OP_DCL_INPUT_PS_SIV = 0x64, + VKD3D_SM4_OP_DCL_OUTPUT = 0x65, + VKD3D_SM4_OP_DCL_OUTPUT_SIV = 0x67, + VKD3D_SM4_OP_DCL_TEMPS = 0x68, + VKD3D_SM4_OP_DCL_INDEXABLE_TEMP = 0x69, + VKD3D_SM4_OP_DCL_GLOBAL_FLAGS = 0x6a, + VKD3D_SM4_OP_LOD = 0x6c, + VKD3D_SM4_OP_GATHER4 = 0x6d, + VKD3D_SM4_OP_SAMPLE_POS = 0x6e, + VKD3D_SM4_OP_SAMPLE_INFO = 0x6f, + VKD3D_SM5_OP_HS_DECLS = 0x71, + VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE = 0x72, + VKD3D_SM5_OP_HS_FORK_PHASE = 0x73, + VKD3D_SM5_OP_HS_JOIN_PHASE = 0x74, + VKD3D_SM5_OP_EMIT_STREAM = 0x75, + VKD3D_SM5_OP_CUT_STREAM = 0x76, + VKD3D_SM5_OP_FCALL = 0x78, + VKD3D_SM5_OP_BUFINFO = 0x79, + VKD3D_SM5_OP_DERIV_RTX_COARSE = 0x7a, + VKD3D_SM5_OP_DERIV_RTX_FINE = 0x7b, + VKD3D_SM5_OP_DERIV_RTY_COARSE = 0x7c, + VKD3D_SM5_OP_DERIV_RTY_FINE = 0x7d, + VKD3D_SM5_OP_GATHER4_C = 0x7e, + VKD3D_SM5_OP_GATHER4_PO = 0x7f, + VKD3D_SM5_OP_GATHER4_PO_C = 0x80, + VKD3D_SM5_OP_RCP = 0x81, + VKD3D_SM5_OP_F32TOF16 = 0x82, + VKD3D_SM5_OP_F16TOF32 = 0x83, + VKD3D_SM5_OP_COUNTBITS = 0x86, + VKD3D_SM5_OP_FIRSTBIT_HI = 0x87, + VKD3D_SM5_OP_FIRSTBIT_LO = 0x88, + VKD3D_SM5_OP_FIRSTBIT_SHI = 0x89, + VKD3D_SM5_OP_UBFE = 0x8a, + VKD3D_SM5_OP_IBFE = 0x8b, + VKD3D_SM5_OP_BFI = 0x8c, + VKD3D_SM5_OP_BFREV = 0x8d, + VKD3D_SM5_OP_SWAPC = 0x8e, + VKD3D_SM5_OP_DCL_STREAM = 0x8f, + VKD3D_SM5_OP_DCL_FUNCTION_BODY = 0x90, + VKD3D_SM5_OP_DCL_FUNCTION_TABLE = 0x91, + VKD3D_SM5_OP_DCL_INTERFACE = 0x92, + VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT = 0x93, + VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT = 0x94, + VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN = 0x95, + VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING = 0x96, + VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE = 0x97, + VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR = 0x98, + VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT = 0x99, + VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT = 0x9a, + VKD3D_SM5_OP_DCL_THREAD_GROUP = 0x9b, + VKD3D_SM5_OP_DCL_UAV_TYPED = 0x9c, + VKD3D_SM5_OP_DCL_UAV_RAW = 0x9d, + VKD3D_SM5_OP_DCL_UAV_STRUCTURED = 0x9e, + VKD3D_SM5_OP_DCL_TGSM_RAW = 0x9f, + VKD3D_SM5_OP_DCL_TGSM_STRUCTURED = 0xa0, + VKD3D_SM5_OP_DCL_RESOURCE_RAW = 0xa1, + VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED = 0xa2, + VKD3D_SM5_OP_LD_UAV_TYPED = 0xa3, + VKD3D_SM5_OP_STORE_UAV_TYPED = 0xa4, + VKD3D_SM5_OP_LD_RAW = 0xa5, + VKD3D_SM5_OP_STORE_RAW = 0xa6, + VKD3D_SM5_OP_LD_STRUCTURED = 0xa7, + VKD3D_SM5_OP_STORE_STRUCTURED = 0xa8, + VKD3D_SM5_OP_ATOMIC_AND = 0xa9, + VKD3D_SM5_OP_ATOMIC_OR = 0xaa, + VKD3D_SM5_OP_ATOMIC_XOR = 0xab, + VKD3D_SM5_OP_ATOMIC_CMP_STORE = 0xac, + VKD3D_SM5_OP_ATOMIC_IADD = 0xad, + VKD3D_SM5_OP_ATOMIC_IMAX = 0xae, + VKD3D_SM5_OP_ATOMIC_IMIN = 0xaf, + VKD3D_SM5_OP_ATOMIC_UMAX = 0xb0, + VKD3D_SM5_OP_ATOMIC_UMIN = 0xb1, + VKD3D_SM5_OP_IMM_ATOMIC_ALLOC = 0xb2, + VKD3D_SM5_OP_IMM_ATOMIC_CONSUME = 0xb3, + VKD3D_SM5_OP_IMM_ATOMIC_IADD = 0xb4, + VKD3D_SM5_OP_IMM_ATOMIC_AND = 0xb5, + VKD3D_SM5_OP_IMM_ATOMIC_OR = 0xb6, + VKD3D_SM5_OP_IMM_ATOMIC_XOR = 0xb7, + VKD3D_SM5_OP_IMM_ATOMIC_EXCH = 0xb8, + VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH = 0xb9, + VKD3D_SM5_OP_IMM_ATOMIC_IMAX = 0xba, + VKD3D_SM5_OP_IMM_ATOMIC_IMIN = 0xbb, + VKD3D_SM5_OP_IMM_ATOMIC_UMAX = 0xbc, + VKD3D_SM5_OP_IMM_ATOMIC_UMIN = 0xbd, + VKD3D_SM5_OP_SYNC = 0xbe, + VKD3D_SM5_OP_DADD = 0xbf, + VKD3D_SM5_OP_DMAX = 0xc0, + VKD3D_SM5_OP_DMIN = 0xc1, + VKD3D_SM5_OP_DMUL = 0xc2, + VKD3D_SM5_OP_DEQ = 0xc3, + VKD3D_SM5_OP_DGE = 0xc4, + VKD3D_SM5_OP_DLT = 0xc5, + VKD3D_SM5_OP_DNE = 0xc6, + VKD3D_SM5_OP_DMOV = 0xc7, + VKD3D_SM5_OP_DMOVC = 0xc8, + VKD3D_SM5_OP_DTOF = 0xc9, + VKD3D_SM5_OP_FTOD = 0xca, + VKD3D_SM5_OP_EVAL_SAMPLE_INDEX = 0xcc, + VKD3D_SM5_OP_EVAL_CENTROID = 0xcd, + VKD3D_SM5_OP_DCL_GS_INSTANCES = 0xce, + VKD3D_SM5_OP_DDIV = 0xd2, + VKD3D_SM5_OP_DFMA = 0xd3, + VKD3D_SM5_OP_DRCP = 0xd4, + VKD3D_SM5_OP_DTOI = 0xd6, + VKD3D_SM5_OP_DTOU = 0xd7, + VKD3D_SM5_OP_ITOD = 0xd8, + VKD3D_SM5_OP_UTOD = 0xd9, + VKD3D_SM5_OP_GATHER4_S = 0xdb, + VKD3D_SM5_OP_GATHER4_C_S = 0xdc, + VKD3D_SM5_OP_GATHER4_PO_S = 0xdd, + VKD3D_SM5_OP_GATHER4_PO_C_S = 0xde, + VKD3D_SM5_OP_LD_S = 0xdf, + VKD3D_SM5_OP_LD2DMS_S = 0xe0, + VKD3D_SM5_OP_LD_UAV_TYPED_S = 0xe1, + VKD3D_SM5_OP_LD_RAW_S = 0xe2, + VKD3D_SM5_OP_LD_STRUCTURED_S = 0xe3, + VKD3D_SM5_OP_SAMPLE_LOD_S = 0xe4, + VKD3D_SM5_OP_SAMPLE_C_LZ_S = 0xe5, + VKD3D_SM5_OP_SAMPLE_CL_S = 0xe6, + VKD3D_SM5_OP_SAMPLE_B_CL_S = 0xe7, + VKD3D_SM5_OP_SAMPLE_GRAD_CL_S = 0xe8, + VKD3D_SM5_OP_SAMPLE_C_CL_S = 0xe9, + VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED = 0xea, +}; + +enum vkd3d_sm4_instruction_modifier +{ + VKD3D_SM4_MODIFIER_AOFFIMMI = 0x1, + VKD3D_SM5_MODIFIER_RESOURCE_TYPE = 0x2, + VKD3D_SM5_MODIFIER_DATA_TYPE = 0x3, +}; + +enum vkd3d_sm4_register_type +{ + VKD3D_SM4_RT_TEMP = 0x00, + VKD3D_SM4_RT_INPUT = 0x01, + VKD3D_SM4_RT_OUTPUT = 0x02, + VKD3D_SM4_RT_INDEXABLE_TEMP = 0x03, + VKD3D_SM4_RT_IMMCONST = 0x04, + VKD3D_SM4_RT_IMMCONST64 = 0x05, + VKD3D_SM4_RT_SAMPLER = 0x06, + VKD3D_SM4_RT_RESOURCE = 0x07, + VKD3D_SM4_RT_CONSTBUFFER = 0x08, + VKD3D_SM4_RT_IMMCONSTBUFFER = 0x09, + VKD3D_SM4_RT_PRIMID = 0x0b, + VKD3D_SM4_RT_DEPTHOUT = 0x0c, + VKD3D_SM4_RT_NULL = 0x0d, + VKD3D_SM4_RT_RASTERIZER = 0x0e, + VKD3D_SM4_RT_OMASK = 0x0f, + VKD3D_SM5_RT_STREAM = 0x10, + VKD3D_SM5_RT_FUNCTION_BODY = 0x11, + VKD3D_SM5_RT_FUNCTION_POINTER = 0x13, + VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID = 0x16, + VKD3D_SM5_RT_FORK_INSTANCE_ID = 0x17, + VKD3D_SM5_RT_JOIN_INSTANCE_ID = 0x18, + VKD3D_SM5_RT_INPUT_CONTROL_POINT = 0x19, + VKD3D_SM5_RT_OUTPUT_CONTROL_POINT = 0x1a, + VKD3D_SM5_RT_PATCH_CONSTANT_DATA = 0x1b, + VKD3D_SM5_RT_DOMAIN_LOCATION = 0x1c, + VKD3D_SM5_RT_UAV = 0x1e, + VKD3D_SM5_RT_SHARED_MEMORY = 0x1f, + VKD3D_SM5_RT_THREAD_ID = 0x20, + VKD3D_SM5_RT_THREAD_GROUP_ID = 0x21, + VKD3D_SM5_RT_LOCAL_THREAD_ID = 0x22, + VKD3D_SM5_RT_COVERAGE = 0x23, + VKD3D_SM5_RT_LOCAL_THREAD_INDEX = 0x24, + VKD3D_SM5_RT_GS_INSTANCE_ID = 0x25, + VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, + VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, + VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, +}; + +enum vkd3d_sm4_extended_operand_type +{ + VKD3D_SM4_EXTENDED_OPERAND_NONE = 0x0, + VKD3D_SM4_EXTENDED_OPERAND_MODIFIER = 0x1, +}; + +enum vkd3d_sm4_register_modifier +{ + VKD3D_SM4_REGISTER_MODIFIER_NONE = 0x00, + VKD3D_SM4_REGISTER_MODIFIER_NEGATE = 0x01, + VKD3D_SM4_REGISTER_MODIFIER_ABS = 0x02, + VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE = 0x03, +}; + +enum vkd3d_sm4_register_precision +{ + VKD3D_SM4_REGISTER_PRECISION_DEFAULT = 0x0, + VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 = 0x1, + VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 = 0x2, + VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 = 0x4, + VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 = 0x5, +}; + +enum vkd3d_sm4_output_primitive_type +{ + VKD3D_SM4_OUTPUT_PT_POINTLIST = 0x1, + VKD3D_SM4_OUTPUT_PT_LINESTRIP = 0x3, + VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP = 0x5, +}; + +enum vkd3d_sm4_input_primitive_type +{ + VKD3D_SM4_INPUT_PT_POINT = 0x01, + VKD3D_SM4_INPUT_PT_LINE = 0x02, + VKD3D_SM4_INPUT_PT_TRIANGLE = 0x03, + VKD3D_SM4_INPUT_PT_LINEADJ = 0x06, + VKD3D_SM4_INPUT_PT_TRIANGLEADJ = 0x07, + VKD3D_SM5_INPUT_PT_PATCH1 = 0x08, + VKD3D_SM5_INPUT_PT_PATCH2 = 0x09, + VKD3D_SM5_INPUT_PT_PATCH3 = 0x0a, + VKD3D_SM5_INPUT_PT_PATCH4 = 0x0b, + VKD3D_SM5_INPUT_PT_PATCH5 = 0x0c, + VKD3D_SM5_INPUT_PT_PATCH6 = 0x0d, + VKD3D_SM5_INPUT_PT_PATCH7 = 0x0e, + VKD3D_SM5_INPUT_PT_PATCH8 = 0x0f, + VKD3D_SM5_INPUT_PT_PATCH9 = 0x10, + VKD3D_SM5_INPUT_PT_PATCH10 = 0x11, + VKD3D_SM5_INPUT_PT_PATCH11 = 0x12, + VKD3D_SM5_INPUT_PT_PATCH12 = 0x13, + VKD3D_SM5_INPUT_PT_PATCH13 = 0x14, + VKD3D_SM5_INPUT_PT_PATCH14 = 0x15, + VKD3D_SM5_INPUT_PT_PATCH15 = 0x16, + VKD3D_SM5_INPUT_PT_PATCH16 = 0x17, + VKD3D_SM5_INPUT_PT_PATCH17 = 0x18, + VKD3D_SM5_INPUT_PT_PATCH18 = 0x19, + VKD3D_SM5_INPUT_PT_PATCH19 = 0x1a, + VKD3D_SM5_INPUT_PT_PATCH20 = 0x1b, + VKD3D_SM5_INPUT_PT_PATCH21 = 0x1c, + VKD3D_SM5_INPUT_PT_PATCH22 = 0x1d, + VKD3D_SM5_INPUT_PT_PATCH23 = 0x1e, + VKD3D_SM5_INPUT_PT_PATCH24 = 0x1f, + VKD3D_SM5_INPUT_PT_PATCH25 = 0x20, + VKD3D_SM5_INPUT_PT_PATCH26 = 0x21, + VKD3D_SM5_INPUT_PT_PATCH27 = 0x22, + VKD3D_SM5_INPUT_PT_PATCH28 = 0x23, + VKD3D_SM5_INPUT_PT_PATCH29 = 0x24, + VKD3D_SM5_INPUT_PT_PATCH30 = 0x25, + VKD3D_SM5_INPUT_PT_PATCH31 = 0x26, + VKD3D_SM5_INPUT_PT_PATCH32 = 0x27, +}; + +enum vkd3d_sm4_swizzle_type +{ + VKD3D_SM4_SWIZZLE_NONE = 0x0, + VKD3D_SM4_SWIZZLE_VEC4 = 0x1, + VKD3D_SM4_SWIZZLE_SCALAR = 0x2, +}; + +enum vkd3d_sm4_dimension +{ + VKD3D_SM4_DIMENSION_NONE = 0x0, + VKD3D_SM4_DIMENSION_SCALAR = 0x1, + VKD3D_SM4_DIMENSION_VEC4 = 0x2, +}; + +enum vkd3d_sm4_resource_type +{ + VKD3D_SM4_RESOURCE_BUFFER = 0x1, + VKD3D_SM4_RESOURCE_TEXTURE_1D = 0x2, + VKD3D_SM4_RESOURCE_TEXTURE_2D = 0x3, + VKD3D_SM4_RESOURCE_TEXTURE_2DMS = 0x4, + VKD3D_SM4_RESOURCE_TEXTURE_3D = 0x5, + VKD3D_SM4_RESOURCE_TEXTURE_CUBE = 0x6, + VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY = 0x7, + VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY = 0x8, + VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY = 0x9, + VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY = 0xa, + VKD3D_SM4_RESOURCE_RAW_BUFFER = 0xb, + VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER = 0xc, +}; + +enum vkd3d_sm4_data_type +{ + VKD3D_SM4_DATA_UNORM = 0x1, + VKD3D_SM4_DATA_SNORM = 0x2, + VKD3D_SM4_DATA_INT = 0x3, + VKD3D_SM4_DATA_UINT = 0x4, + VKD3D_SM4_DATA_FLOAT = 0x5, + VKD3D_SM4_DATA_MIXED = 0x6, + VKD3D_SM4_DATA_DOUBLE = 0x7, + VKD3D_SM4_DATA_CONTINUED = 0x8, + VKD3D_SM4_DATA_UNUSED = 0x9, +}; + +enum vkd3d_sm4_sampler_mode +{ + VKD3D_SM4_SAMPLER_DEFAULT = 0x0, + VKD3D_SM4_SAMPLER_COMPARISON = 0x1, +}; + +enum vkd3d_sm4_shader_data_type +{ + VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER = 0x3, + VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, +}; + +#endif /* __VKD3D_SM4_H */ diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c new file mode 100644 index 00000000000..c87a7b80e56 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -0,0 +1,10006 @@ +/* + * Copyright 2017 Józef Kucia for CodeWeavers + * Copyright 2021 Conor McCarthy for Codeweavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" +#include "wine/rbtree.h" + +#include +#include + +#ifdef HAVE_SPIRV_UNIFIED1_SPIRV_H +# include "spirv/unified1/spirv.h" +#else +# include "vulkan/spirv.h" +#endif /* HAVE_SPIRV_UNIFIED1_SPIRV_H */ +#ifdef HAVE_SPIRV_UNIFIED1_GLSL_STD_450_H +# include "spirv/unified1/GLSL.std.450.h" +#else +# include "vulkan/GLSL.std.450.h" +#endif /* HAVE_SPIRV_UNIFIED1_GLSL_STD_450_H */ + +#ifdef HAVE_SPIRV_TOOLS +# include "spirv-tools/libspirv.h" + +static spv_target_env spv_target_env_from_vkd3d(enum vkd3d_shader_spirv_environment environment) +{ + switch (environment) + { + case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5: + return SPV_ENV_OPENGL_4_5; + case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: + return SPV_ENV_VULKAN_1_0; + default: + ERR("Invalid environment %#x.\n", environment); + return SPV_ENV_VULKAN_1_0; + } +} + +static uint32_t get_binary_to_text_options(enum vkd3d_shader_compile_option_formatting_flags formatting) +{ + uint32_t out = 0; + unsigned int i; + + static const struct + { + enum vkd3d_shader_compile_option_formatting_flags vkd3d; + uint32_t spv; + bool invert; + } + valuemap[] = + { + {VKD3D_SHADER_COMPILE_OPTION_FORMATTING_COLOUR, SPV_BINARY_TO_TEXT_OPTION_COLOR }, + {VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT, SPV_BINARY_TO_TEXT_OPTION_INDENT }, + {VKD3D_SHADER_COMPILE_OPTION_FORMATTING_OFFSETS, SPV_BINARY_TO_TEXT_OPTION_SHOW_BYTE_OFFSET}, + {VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER, SPV_BINARY_TO_TEXT_OPTION_NO_HEADER, true}, + {VKD3D_SHADER_COMPILE_OPTION_FORMATTING_RAW_IDS, SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES, true}, + }; + + for (i = 0; i < ARRAY_SIZE(valuemap); ++i) + { + if (valuemap[i].invert == !(formatting & valuemap[i].vkd3d)) + out |= valuemap[i].spv; + } + + return out; +} + +static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, + enum vkd3d_shader_spirv_environment environment, + enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_shader_code *out) +{ + spv_diagnostic diagnostic = NULL; + spv_text text = NULL; + spv_context context; + spv_result_t spvret; + enum vkd3d_result result = VKD3D_OK; + + context = spvContextCreate(spv_target_env_from_vkd3d(environment)); + + if (!(spvret = spvBinaryToText(context, spirv->code, spirv->size / sizeof(uint32_t), + get_binary_to_text_options(formatting), &text, &diagnostic))) + { + void *code = vkd3d_malloc(text->length); + if (code) + { + memcpy(code, text->str, text->length); + out->size = text->length; + out->code = code; + } + else + result = VKD3D_ERROR_OUT_OF_MEMORY; + } + else + { + FIXME("Failed to convert SPIR-V binary to text, ret %d.\n", spvret); + FIXME("Diagnostic message: %s.\n", debugstr_a(diagnostic->error)); + result = VKD3D_ERROR; + } + + spvTextDestroy(text); + spvDiagnosticDestroy(diagnostic); + spvContextDestroy(context); + + return result; +} + +static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, + enum vkd3d_shader_spirv_environment environment) +{ + static const enum vkd3d_shader_compile_option_formatting_flags formatting + = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER; + struct vkd3d_shader_code text; + + if (!vkd3d_spirv_binary_to_text(spirv, environment, formatting, &text)) + { + vkd3d_shader_trace_text(text.code, text.size); + vkd3d_shader_free_shader_code(&text); + } +} + +static void vkd3d_spirv_validate(const struct vkd3d_shader_code *spirv, + enum vkd3d_shader_spirv_environment environment) +{ + spv_diagnostic diagnostic = NULL; + spv_context context; + spv_result_t ret; + + context = spvContextCreate(spv_target_env_from_vkd3d(environment)); + + if ((ret = spvValidateBinary(context, spirv->code, spirv->size / sizeof(uint32_t), + &diagnostic))) + { + FIXME("Failed to validate SPIR-V binary, ret %d.\n", ret); + FIXME("Diagnostic message: %s.\n", debugstr_a(diagnostic->error)); + } + + spvDiagnosticDestroy(diagnostic); + spvContextDestroy(context); +} + +#else + +static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, + enum vkd3d_shader_spirv_environment environment, + enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_shader_code *out) +{ + return VKD3D_ERROR; +} +static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, + enum vkd3d_shader_spirv_environment environment) {} +static void vkd3d_spirv_validate(const struct vkd3d_shader_code *spirv, + enum vkd3d_shader_spirv_environment environment) {} + +#endif /* HAVE_SPIRV_TOOLS */ + +static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, + unsigned int index) +{ + switch (sysval) + { + case VKD3D_SHADER_SV_NONE: + return VKD3D_SIV_NONE; + case VKD3D_SHADER_SV_POSITION: + return VKD3D_SIV_POSITION; + case VKD3D_SHADER_SV_CLIP_DISTANCE: + return VKD3D_SIV_CLIP_DISTANCE; + case VKD3D_SHADER_SV_CULL_DISTANCE: + return VKD3D_SIV_CULL_DISTANCE; + case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: + return VKD3D_SIV_QUAD_U0_TESS_FACTOR + index; + case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: + return VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR + index; + case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: + return VKD3D_SIV_TRIANGLE_U_TESS_FACTOR + index; + case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: + return VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR; + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: + return VKD3D_SIV_LINE_DETAIL_TESS_FACTOR; + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: + return VKD3D_SIV_LINE_DENSITY_TESS_FACTOR; + default: + FIXME("Unhandled sysval %#x, index %u.\n", sysval, index); + return VKD3D_SIV_NONE; + } +} + +static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_shader_sysval_semantic sysval) +{ + return vkd3d_siv_from_sysval_indexed(sysval, 0); +} + +#define VKD3D_SPIRV_VERSION 0x00010000 +#define VKD3D_SPIRV_GENERATOR_ID 18 +#define VKD3D_SPIRV_GENERATOR_VERSION 3 +#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) + +struct vkd3d_spirv_stream +{ + uint32_t *words; + size_t capacity; + size_t word_count; + + struct list inserted_chunks; +}; + +static void vkd3d_spirv_stream_init(struct vkd3d_spirv_stream *stream) +{ + stream->capacity = 256; + if (!(stream->words = vkd3d_calloc(stream->capacity, sizeof(*stream->words)))) + stream->capacity = 0; + stream->word_count = 0; + + list_init(&stream->inserted_chunks); +} + +struct vkd3d_spirv_chunk +{ + struct list entry; + size_t location; + size_t word_count; + uint32_t words[]; +}; + +static void vkd3d_spirv_stream_clear(struct vkd3d_spirv_stream *stream) +{ + struct vkd3d_spirv_chunk *c1, *c2; + + stream->word_count = 0; + + LIST_FOR_EACH_ENTRY_SAFE(c1, c2, &stream->inserted_chunks, struct vkd3d_spirv_chunk, entry) + vkd3d_free(c1); + + list_init(&stream->inserted_chunks); +} + +static void vkd3d_spirv_stream_free(struct vkd3d_spirv_stream *stream) +{ + vkd3d_free(stream->words); + + vkd3d_spirv_stream_clear(stream); +} + +static size_t vkd3d_spirv_stream_current_location(struct vkd3d_spirv_stream *stream) +{ + return stream->word_count; +} + +static void vkd3d_spirv_stream_insert(struct vkd3d_spirv_stream *stream, + size_t location, const uint32_t *words, unsigned int word_count) +{ + struct vkd3d_spirv_chunk *chunk, *current; + + if (!(chunk = vkd3d_malloc(offsetof(struct vkd3d_spirv_chunk, words[word_count])))) + return; + + chunk->location = location; + chunk->word_count = word_count; + memcpy(chunk->words, words, word_count * sizeof(*words)); + + LIST_FOR_EACH_ENTRY(current, &stream->inserted_chunks, struct vkd3d_spirv_chunk, entry) + { + if (current->location > location) + { + list_add_before(¤t->entry, &chunk->entry); + return; + } + } + + list_add_tail(&stream->inserted_chunks, &chunk->entry); +} + +static bool vkd3d_spirv_stream_append(struct vkd3d_spirv_stream *dst_stream, + const struct vkd3d_spirv_stream *src_stream) +{ + size_t word_count, src_word_count = src_stream->word_count; + struct vkd3d_spirv_chunk *chunk; + size_t src_location = 0; + + assert(list_empty(&dst_stream->inserted_chunks)); + + LIST_FOR_EACH_ENTRY(chunk, &src_stream->inserted_chunks, struct vkd3d_spirv_chunk, entry) + src_word_count += chunk->word_count; + + if (!vkd3d_array_reserve((void **)&dst_stream->words, &dst_stream->capacity, + dst_stream->word_count + src_word_count, sizeof(*dst_stream->words))) + return false; + + assert(dst_stream->word_count + src_word_count <= dst_stream->capacity); + LIST_FOR_EACH_ENTRY(chunk, &src_stream->inserted_chunks, struct vkd3d_spirv_chunk, entry) + { + assert(src_location <= chunk->location); + word_count = chunk->location - src_location; + memcpy(&dst_stream->words[dst_stream->word_count], &src_stream->words[src_location], + word_count * sizeof(*src_stream->words)); + dst_stream->word_count += word_count; + src_location += word_count; + assert(src_location == chunk->location); + + memcpy(&dst_stream->words[dst_stream->word_count], chunk->words, + chunk->word_count * sizeof(*chunk->words)); + dst_stream->word_count += chunk->word_count; + } + + word_count = src_stream->word_count - src_location; + memcpy(&dst_stream->words[dst_stream->word_count], &src_stream->words[src_location], + word_count * sizeof(*src_stream->words)); + dst_stream->word_count += word_count; + return true; +} + +struct vkd3d_spirv_builder +{ + uint64_t capability_mask; + SpvCapability *capabilities; + size_t capabilities_size; + size_t capabilities_count; + uint32_t ext_instr_set_glsl_450; + uint32_t invocation_count; + SpvExecutionModel execution_model; + + uint32_t current_id; + uint32_t main_function_id; + struct rb_tree declarations; + uint32_t type_sampler_id; + uint32_t type_bool_id; + uint32_t type_void_id; + + struct vkd3d_spirv_stream debug_stream; /* debug instructions */ + struct vkd3d_spirv_stream annotation_stream; /* decoration instructions */ + struct vkd3d_spirv_stream global_stream; /* types, constants, global variables */ + struct vkd3d_spirv_stream function_stream; /* function definitions */ + + struct vkd3d_spirv_stream execution_mode_stream; /* execution mode instructions */ + + struct vkd3d_spirv_stream original_function_stream; + struct vkd3d_spirv_stream insertion_stream; + size_t insertion_location; + + size_t main_function_location; + + /* entry point interface */ + uint32_t *iface; + size_t iface_capacity; + size_t iface_element_count; +}; + +static uint32_t vkd3d_spirv_alloc_id(struct vkd3d_spirv_builder *builder) +{ + return builder->current_id++; +} + +static bool vkd3d_spirv_capability_is_enabled(struct vkd3d_spirv_builder *builder, + SpvCapability cap) +{ + size_t i; + + if (cap < sizeof(builder->capability_mask) * CHAR_BIT) + return (builder->capability_mask >> cap) & 1; + + for (i = 0; i < builder->capabilities_count; ++i) + if (builder->capabilities[i] == cap) + return true; + + return false; +} + +static void vkd3d_spirv_enable_capability(struct vkd3d_spirv_builder *builder, + SpvCapability cap) +{ + if (cap < sizeof(builder->capability_mask) * CHAR_BIT) + { + builder->capability_mask |= 1ull << cap; + return; + } + + if (vkd3d_spirv_capability_is_enabled(builder, cap)) + return; + + vkd3d_array_reserve((void **)&builder->capabilities, &builder->capabilities_size, + builder->capabilities_count + 1, sizeof(*builder->capabilities)); + + builder->capabilities[builder->capabilities_count++] = cap; +} + +static uint32_t vkd3d_spirv_get_glsl_std450_instr_set(struct vkd3d_spirv_builder *builder) +{ + if (!builder->ext_instr_set_glsl_450) + builder->ext_instr_set_glsl_450 = vkd3d_spirv_alloc_id(builder); + + return builder->ext_instr_set_glsl_450; +} + +static void vkd3d_spirv_add_iface_variable(struct vkd3d_spirv_builder *builder, + uint32_t id) +{ + if (!vkd3d_array_reserve((void **)&builder->iface, &builder->iface_capacity, + builder->iface_element_count + 1, sizeof(*builder->iface))) + return; + + builder->iface[builder->iface_element_count++] = id; +} + +static void vkd3d_spirv_set_execution_model(struct vkd3d_spirv_builder *builder, + SpvExecutionModel model) +{ + builder->execution_model = model; + + switch (model) + { + case SpvExecutionModelVertex: + case SpvExecutionModelFragment: + case SpvExecutionModelGLCompute: + vkd3d_spirv_enable_capability(builder, SpvCapabilityShader); + break; + case SpvExecutionModelTessellationControl: + case SpvExecutionModelTessellationEvaluation: + vkd3d_spirv_enable_capability(builder, SpvCapabilityTessellation); + break; + case SpvExecutionModelGeometry: + vkd3d_spirv_enable_capability(builder, SpvCapabilityGeometry); + break; + default: + ERR("Unhandled execution model %#x.\n", model); + } +} + +static uint32_t vkd3d_spirv_opcode_word(SpvOp op, unsigned int word_count) +{ + assert(!(op & ~SpvOpCodeMask)); + return (word_count << SpvWordCountShift) | op; +} + +static void vkd3d_spirv_build_word(struct vkd3d_spirv_stream *stream, uint32_t word) +{ + if (!vkd3d_array_reserve((void **)&stream->words, &stream->capacity, + stream->word_count + 1, sizeof(*stream->words))) + return; + + stream->words[stream->word_count++] = word; +} + +static unsigned int vkd3d_spirv_string_word_count(const char *str) +{ + return align(strlen(str) + 1, sizeof(uint32_t)) / sizeof(uint32_t); +} + +static void vkd3d_spirv_build_string(struct vkd3d_spirv_stream *stream, + const char *str, unsigned int word_count) +{ + unsigned int word_idx, i; + const char *ptr = str; + + for (word_idx = 0; word_idx < word_count; ++word_idx) + { + uint32_t word = 0; + for (i = 0; i < sizeof(uint32_t) && *ptr; ++i) + word |= (uint32_t)*ptr++ << (8 * i); + vkd3d_spirv_build_word(stream, word); + } +} + +typedef uint32_t (*vkd3d_spirv_build_pfn)(struct vkd3d_spirv_builder *builder); +typedef uint32_t (*vkd3d_spirv_build1_pfn)(struct vkd3d_spirv_builder *builder, + uint32_t operand0); +typedef uint32_t (*vkd3d_spirv_build1v_pfn)(struct vkd3d_spirv_builder *builder, + uint32_t operand0, const uint32_t *operands, unsigned int operand_count); +typedef uint32_t (*vkd3d_spirv_build2_pfn)(struct vkd3d_spirv_builder *builder, + uint32_t operand0, uint32_t operand1); +typedef uint32_t (*vkd3d_spirv_build7_pfn)(struct vkd3d_spirv_builder *builder, + uint32_t operand0, uint32_t operand1, uint32_t operand2, uint32_t operand3, + uint32_t operand4, uint32_t operand5, uint32_t operand6); + +static uint32_t vkd3d_spirv_build_once(struct vkd3d_spirv_builder *builder, + uint32_t *id, vkd3d_spirv_build_pfn build_pfn) +{ + if (!(*id)) + *id = build_pfn(builder); + return *id; +} + +#define MAX_SPIRV_DECLARATION_PARAMETER_COUNT 7 + +struct vkd3d_spirv_declaration +{ + struct rb_entry entry; + + SpvOp op; + unsigned int parameter_count; + uint32_t parameters[MAX_SPIRV_DECLARATION_PARAMETER_COUNT]; + uint32_t id; +}; + +static int vkd3d_spirv_declaration_compare(const void *key, const struct rb_entry *e) +{ + const struct vkd3d_spirv_declaration *a = key; + const struct vkd3d_spirv_declaration *b = RB_ENTRY_VALUE(e, const struct vkd3d_spirv_declaration, entry); + int ret; + + if ((ret = vkd3d_u32_compare(a->op, b->op))) + return ret; + if ((ret = vkd3d_u32_compare(a->parameter_count, b->parameter_count))) + return ret; + assert(a->parameter_count <= ARRAY_SIZE(a->parameters)); + return memcmp(&a->parameters, &b->parameters, a->parameter_count * sizeof(*a->parameters)); +} + +static void vkd3d_spirv_declaration_free(struct rb_entry *entry, void *context) +{ + struct vkd3d_spirv_declaration *d = RB_ENTRY_VALUE(entry, struct vkd3d_spirv_declaration, entry); + + vkd3d_free(d); +} + +static void vkd3d_spirv_insert_declaration(struct vkd3d_spirv_builder *builder, + const struct vkd3d_spirv_declaration *declaration) +{ + struct vkd3d_spirv_declaration *d; + + assert(declaration->parameter_count <= ARRAY_SIZE(declaration->parameters)); + + if (!(d = vkd3d_malloc(sizeof(*d)))) + return; + memcpy(d, declaration, sizeof(*d)); + if (rb_put(&builder->declarations, d, &d->entry) == -1) + { + ERR("Failed to insert declaration entry.\n"); + vkd3d_free(d); + } +} + +static uint32_t vkd3d_spirv_build_once1(struct vkd3d_spirv_builder *builder, + SpvOp op, uint32_t operand0, vkd3d_spirv_build1_pfn build_pfn) +{ + struct vkd3d_spirv_declaration declaration; + struct rb_entry *entry; + + declaration.op = op; + declaration.parameter_count = 1; + declaration.parameters[0] = operand0; + + if ((entry = rb_get(&builder->declarations, &declaration))) + return RB_ENTRY_VALUE(entry, struct vkd3d_spirv_declaration, entry)->id; + + declaration.id = build_pfn(builder, operand0); + vkd3d_spirv_insert_declaration(builder, &declaration); + return declaration.id; +} + +static uint32_t vkd3d_spirv_build_once1v(struct vkd3d_spirv_builder *builder, + SpvOp op, uint32_t operand0, const uint32_t *operands, unsigned int operand_count, + vkd3d_spirv_build1v_pfn build_pfn) +{ + struct vkd3d_spirv_declaration declaration; + unsigned int i, param_idx = 0; + struct rb_entry *entry; + + if (operand_count >= ARRAY_SIZE(declaration.parameters)) + { + WARN("Unsupported parameter count %u (opcode %#x).\n", operand_count + 1, op); + return build_pfn(builder, operand0, operands, operand_count); + } + + declaration.op = op; + declaration.parameters[param_idx++] = operand0; + for (i = 0; i < operand_count; ++i) + declaration.parameters[param_idx++] = operands[i]; + declaration.parameter_count = param_idx; + + if ((entry = rb_get(&builder->declarations, &declaration))) + return RB_ENTRY_VALUE(entry, struct vkd3d_spirv_declaration, entry)->id; + + declaration.id = build_pfn(builder, operand0, operands, operand_count); + vkd3d_spirv_insert_declaration(builder, &declaration); + return declaration.id; +} + +static uint32_t vkd3d_spirv_build_once2(struct vkd3d_spirv_builder *builder, + SpvOp op, uint32_t operand0, uint32_t operand1, vkd3d_spirv_build2_pfn build_pfn) +{ + struct vkd3d_spirv_declaration declaration; + struct rb_entry *entry; + + declaration.op = op; + declaration.parameter_count = 2; + declaration.parameters[0] = operand0; + declaration.parameters[1] = operand1; + + if ((entry = rb_get(&builder->declarations, &declaration))) + return RB_ENTRY_VALUE(entry, struct vkd3d_spirv_declaration, entry)->id; + + declaration.id = build_pfn(builder, operand0, operand1); + vkd3d_spirv_insert_declaration(builder, &declaration); + return declaration.id; +} + +static uint32_t vkd3d_spirv_build_once7(struct vkd3d_spirv_builder *builder, + SpvOp op, const uint32_t *operands, vkd3d_spirv_build7_pfn build_pfn) +{ + struct vkd3d_spirv_declaration declaration; + struct rb_entry *entry; + + declaration.op = op; + declaration.parameter_count = 7; + memcpy(&declaration.parameters, operands, declaration.parameter_count * sizeof(*operands)); + + if ((entry = rb_get(&builder->declarations, &declaration))) + return RB_ENTRY_VALUE(entry, struct vkd3d_spirv_declaration, entry)->id; + + declaration.id = build_pfn(builder, operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5], operands[6]); + vkd3d_spirv_insert_declaration(builder, &declaration); + return declaration.id; +} + +/* + * vkd3d_spirv_build_op[1-3][v]() + * vkd3d_spirv_build_op_[t][r][1-3][v]() + * + * t - result type + * r - result id + * 1-3 - the number of operands + * v - variable number of operands + */ +static void vkd3d_spirv_build_op(struct vkd3d_spirv_stream *stream, SpvOp op) +{ + vkd3d_spirv_build_word(stream, vkd3d_spirv_opcode_word(op, 1)); +} + +static void vkd3d_spirv_build_op1(struct vkd3d_spirv_stream *stream, + SpvOp op, uint32_t operand) +{ + vkd3d_spirv_build_word(stream, vkd3d_spirv_opcode_word(op, 2)); + vkd3d_spirv_build_word(stream, operand); +} + +static void vkd3d_spirv_build_op1v(struct vkd3d_spirv_stream *stream, + SpvOp op, uint32_t operand0, const uint32_t *operands, unsigned int operand_count) +{ + unsigned int i; + vkd3d_spirv_build_word(stream, vkd3d_spirv_opcode_word(op, 2 + operand_count)); + vkd3d_spirv_build_word(stream, operand0); + for (i = 0; i < operand_count; ++i) + vkd3d_spirv_build_word(stream, operands[i]); +} + +static void vkd3d_spirv_build_op2v(struct vkd3d_spirv_stream *stream, + SpvOp op, uint32_t operand0, uint32_t operand1, + const uint32_t *operands, unsigned int operand_count) +{ + unsigned int i; + vkd3d_spirv_build_word(stream, vkd3d_spirv_opcode_word(op, 3 + operand_count)); + vkd3d_spirv_build_word(stream, operand0); + vkd3d_spirv_build_word(stream, operand1); + for (i = 0; i < operand_count; ++i) + vkd3d_spirv_build_word(stream, operands[i]); +} + +static void vkd3d_spirv_build_op3v(struct vkd3d_spirv_stream *stream, + SpvOp op, uint32_t operand0, uint32_t operand1, uint32_t operand2, + const uint32_t *operands, unsigned int operand_count) +{ + unsigned int i; + vkd3d_spirv_build_word(stream, vkd3d_spirv_opcode_word(op, 4 + operand_count)); + vkd3d_spirv_build_word(stream, operand0); + vkd3d_spirv_build_word(stream, operand1); + vkd3d_spirv_build_word(stream, operand2); + for (i = 0; i < operand_count; ++i) + vkd3d_spirv_build_word(stream, operands[i]); +} + +static void vkd3d_spirv_build_op2(struct vkd3d_spirv_stream *stream, + SpvOp op, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op2v(stream, op, operand0, operand1, NULL, 0); +} + +static void vkd3d_spirv_build_op3(struct vkd3d_spirv_stream *stream, + SpvOp op, uint32_t operand0, uint32_t operand1, uint32_t operand2) +{ + return vkd3d_spirv_build_op2v(stream, op, operand0, operand1, &operand2, 1); +} + +static uint32_t vkd3d_spirv_build_op_rv(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, SpvOp op, + const uint32_t *operands, unsigned int operand_count) +{ + uint32_t result_id = vkd3d_spirv_alloc_id(builder); + vkd3d_spirv_build_op1v(stream, op, result_id, operands, operand_count); + return result_id; +} + +static uint32_t vkd3d_spirv_build_op_r(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, SpvOp op) +{ + return vkd3d_spirv_build_op_rv(builder, stream, op, NULL, 0); +} + +static uint32_t vkd3d_spirv_build_op_r1(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, SpvOp op, uint32_t operand0) +{ + return vkd3d_spirv_build_op_rv(builder, stream, op, &operand0, 1); +} + +static uint32_t vkd3d_spirv_build_op_r2(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, SpvOp op, uint32_t operand0, uint32_t operand1) +{ + uint32_t operands[] = {operand0, operand1}; + return vkd3d_spirv_build_op_rv(builder, stream, op, operands, ARRAY_SIZE(operands)); +} + +static uint32_t vkd3d_spirv_build_op_r1v(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, SpvOp op, uint32_t operand0, + const uint32_t *operands, unsigned int operand_count) +{ + uint32_t result_id = vkd3d_spirv_alloc_id(builder); + vkd3d_spirv_build_op2v(stream, op, result_id, operand0, operands, operand_count); + return result_id; +} + +static uint32_t vkd3d_spirv_build_op_trv(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, SpvOp op, uint32_t result_type, + const uint32_t *operands, unsigned int operand_count) +{ + uint32_t result_id = vkd3d_spirv_alloc_id(builder); + vkd3d_spirv_build_op2v(stream, op, result_type, result_id, operands, operand_count); + return result_id; +} + +static uint32_t vkd3d_spirv_build_op_tr(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, SpvOp op, uint32_t result_type) +{ + return vkd3d_spirv_build_op_trv(builder, stream, op, result_type, NULL, 0); +} + +static uint32_t vkd3d_spirv_build_op_tr1(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, SpvOp op, uint32_t result_type, + uint32_t operand0) +{ + return vkd3d_spirv_build_op_trv(builder, stream, op, result_type, &operand0, 1); +} + +static uint32_t vkd3d_spirv_build_op_tr2(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, SpvOp op, uint32_t result_type, + uint32_t operand0, uint32_t operand1) +{ + uint32_t operands[] = {operand0, operand1}; + return vkd3d_spirv_build_op_trv(builder, stream, op, result_type, + operands, ARRAY_SIZE(operands)); +} + +static uint32_t vkd3d_spirv_build_op_tr3(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, SpvOp op, uint32_t result_type, + uint32_t operand0, uint32_t operand1, uint32_t operand2) +{ + uint32_t operands[] = {operand0, operand1, operand2}; + return vkd3d_spirv_build_op_trv(builder, stream, op, result_type, + operands, ARRAY_SIZE(operands)); +} + +static uint32_t vkd3d_spirv_build_op_tr1v(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, SpvOp op, uint32_t result_type, + uint32_t operand0, const uint32_t *operands, unsigned int operand_count) +{ + uint32_t result_id = vkd3d_spirv_alloc_id(builder); + vkd3d_spirv_build_op3v(stream, op, result_type, result_id, operand0, operands, operand_count); + return result_id; +} + +static uint32_t vkd3d_spirv_build_op_tr2v(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, SpvOp op, uint32_t result_type, + uint32_t operand0, uint32_t operand1, const uint32_t *operands, unsigned int operand_count) +{ + uint32_t result_id = vkd3d_spirv_alloc_id(builder); + unsigned int i; + vkd3d_spirv_build_word(stream, vkd3d_spirv_opcode_word(op, 5 + operand_count)); + vkd3d_spirv_build_word(stream, result_type); + vkd3d_spirv_build_word(stream, result_id); + vkd3d_spirv_build_word(stream, operand0); + vkd3d_spirv_build_word(stream, operand1); + for (i = 0; i < operand_count; ++i) + vkd3d_spirv_build_word(stream, operands[i]); + return result_id; +} + +static void vkd3d_spirv_begin_function_stream_insertion(struct vkd3d_spirv_builder *builder, + size_t location) +{ + assert(builder->insertion_location == ~(size_t)0); + + if (vkd3d_spirv_stream_current_location(&builder->function_stream) == location) + return; + + builder->original_function_stream = builder->function_stream; + builder->function_stream = builder->insertion_stream; + builder->insertion_location = location; +} + +static void vkd3d_spirv_end_function_stream_insertion(struct vkd3d_spirv_builder *builder) +{ + struct vkd3d_spirv_stream *insertion_stream = &builder->insertion_stream; + + if (builder->insertion_location == ~(size_t)0) + return; + + builder->insertion_stream = builder->function_stream; + builder->function_stream = builder->original_function_stream; + + vkd3d_spirv_stream_insert(&builder->function_stream, builder->insertion_location, + insertion_stream->words, insertion_stream->word_count); + vkd3d_spirv_stream_clear(insertion_stream); + builder->insertion_location = ~(size_t)0; +} + +struct vkd3d_spirv_op_branch_conditional +{ + uint32_t opcode; + uint32_t condition_id; + uint32_t true_label; + uint32_t false_label; +}; + +static struct vkd3d_spirv_op_branch_conditional *vkd3d_spirv_as_op_branch_conditional( + struct vkd3d_spirv_stream *stream, size_t location) +{ + return (struct vkd3d_spirv_op_branch_conditional *)&stream->words[location]; +} + +static void vkd3d_spirv_build_op_capability(struct vkd3d_spirv_stream *stream, + SpvCapability cap) +{ + vkd3d_spirv_build_op1(stream, SpvOpCapability, cap); +} + +static void vkd3d_spirv_build_op_extension(struct vkd3d_spirv_stream *stream, + const char *name) +{ + unsigned int name_size = vkd3d_spirv_string_word_count(name); + vkd3d_spirv_build_word(stream, vkd3d_spirv_opcode_word(SpvOpExtension, 1 + name_size)); + vkd3d_spirv_build_string(stream, name, name_size); +} + +static void vkd3d_spirv_build_op_ext_inst_import(struct vkd3d_spirv_stream *stream, + uint32_t result_id, const char *name) +{ + unsigned int name_size = vkd3d_spirv_string_word_count(name); + vkd3d_spirv_build_word(stream, vkd3d_spirv_opcode_word(SpvOpExtInstImport, 2 + name_size)); + vkd3d_spirv_build_word(stream, result_id); + vkd3d_spirv_build_string(stream, name, name_size); +} + +static uint32_t vkd3d_spirv_build_op_ext_inst(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t inst_set, uint32_t inst_number, + uint32_t *operands, unsigned int operand_count) +{ + return vkd3d_spirv_build_op_tr2v(builder, &builder->function_stream, + SpvOpExtInst, result_type, inst_set, inst_number, operands, operand_count); +} + +static void vkd3d_spirv_build_op_memory_model(struct vkd3d_spirv_stream *stream, + SpvAddressingModel addressing_model, SpvMemoryModel memory_model) +{ + vkd3d_spirv_build_op2(stream, SpvOpMemoryModel, addressing_model, memory_model); +} + +static void vkd3d_spirv_build_op_entry_point(struct vkd3d_spirv_stream *stream, + SpvExecutionModel model, uint32_t function_id, const char *name, + uint32_t *interface_list, unsigned int interface_size) +{ + unsigned int i, name_size = vkd3d_spirv_string_word_count(name); + vkd3d_spirv_build_word(stream, vkd3d_spirv_opcode_word(SpvOpEntryPoint, 3 + name_size + interface_size)); + vkd3d_spirv_build_word(stream, model); + vkd3d_spirv_build_word(stream, function_id); + vkd3d_spirv_build_string(stream, name, name_size); + for (i = 0; i < interface_size; ++i) + vkd3d_spirv_build_word(stream, interface_list[i]); +} + +static void vkd3d_spirv_build_op_execution_mode(struct vkd3d_spirv_stream *stream, + uint32_t entry_point, SpvExecutionMode mode, const uint32_t *literals, unsigned int literal_count) +{ + vkd3d_spirv_build_op2v(stream, SpvOpExecutionMode, entry_point, mode, literals, literal_count); +} + +static void vkd3d_spirv_build_op_name(struct vkd3d_spirv_builder *builder, + uint32_t id, const char *fmt, ...) +{ + struct vkd3d_spirv_stream *stream = &builder->debug_stream; + unsigned int name_size; + char name[1024]; + va_list args; + + va_start(args, fmt); + vsnprintf(name, ARRAY_SIZE(name), fmt, args); + name[ARRAY_SIZE(name) - 1] = '\0'; + va_end(args); + + name_size = vkd3d_spirv_string_word_count(name); + vkd3d_spirv_build_word(stream, vkd3d_spirv_opcode_word(SpvOpName, 2 + name_size)); + vkd3d_spirv_build_word(stream, id); + vkd3d_spirv_build_string(stream, name, name_size); +} + +static void vkd3d_spirv_build_op_member_name(struct vkd3d_spirv_builder *builder, + uint32_t type_id, uint32_t member, const char *fmt, ...) +{ + struct vkd3d_spirv_stream *stream = &builder->debug_stream; + unsigned int name_size; + char name[1024]; + va_list args; + + va_start(args, fmt); + vsnprintf(name, ARRAY_SIZE(name), fmt, args); + name[ARRAY_SIZE(name) - 1] = '\0'; + va_end(args); + + name_size = vkd3d_spirv_string_word_count(name); + vkd3d_spirv_build_word(stream, vkd3d_spirv_opcode_word(SpvOpMemberName, 3 + name_size)); + vkd3d_spirv_build_word(stream, type_id); + vkd3d_spirv_build_word(stream, member); + vkd3d_spirv_build_string(stream, name, name_size); +} + +static void vkd3d_spirv_build_op_decorate(struct vkd3d_spirv_builder *builder, + uint32_t target_id, SpvDecoration decoration, + uint32_t *literals, uint32_t literal_count) +{ + vkd3d_spirv_build_op2v(&builder->annotation_stream, + SpvOpDecorate, target_id, decoration, literals, literal_count); +} + +static void vkd3d_spirv_build_op_decorate1(struct vkd3d_spirv_builder *builder, + uint32_t target_id, SpvDecoration decoration, uint32_t operand0) +{ + return vkd3d_spirv_build_op_decorate(builder, target_id, decoration, &operand0, 1); +} + +static void vkd3d_spirv_build_op_member_decorate(struct vkd3d_spirv_builder *builder, + uint32_t structure_type_id, uint32_t member_idx, SpvDecoration decoration, + uint32_t *literals, uint32_t literal_count) +{ + vkd3d_spirv_build_op3v(&builder->annotation_stream, SpvOpMemberDecorate, + structure_type_id, member_idx, decoration, literals, literal_count); +} + +static void vkd3d_spirv_build_op_member_decorate1(struct vkd3d_spirv_builder *builder, + uint32_t structure_type_id, uint32_t member_idx, SpvDecoration decoration, uint32_t operand0) +{ + vkd3d_spirv_build_op_member_decorate(builder, structure_type_id, member_idx, decoration, &operand0, 1); +} + +static uint32_t vkd3d_spirv_build_op_type_void(struct vkd3d_spirv_builder *builder) +{ + return vkd3d_spirv_build_op_r(builder, &builder->global_stream, SpvOpTypeVoid); +} + +static uint32_t vkd3d_spirv_get_op_type_void(struct vkd3d_spirv_builder *builder) +{ + return vkd3d_spirv_build_once(builder, &builder->type_void_id, vkd3d_spirv_build_op_type_void); +} + +static uint32_t vkd3d_spirv_build_op_type_bool(struct vkd3d_spirv_builder *builder) +{ + return vkd3d_spirv_build_op_r(builder, &builder->global_stream, SpvOpTypeBool); +} + +static uint32_t vkd3d_spirv_get_op_type_bool(struct vkd3d_spirv_builder *builder) +{ + return vkd3d_spirv_build_once(builder, &builder->type_bool_id, vkd3d_spirv_build_op_type_bool); +} + +static uint32_t vkd3d_spirv_build_op_type_float(struct vkd3d_spirv_builder *builder, + uint32_t width) +{ + return vkd3d_spirv_build_op_r1(builder, &builder->global_stream, SpvOpTypeFloat, width); +} + +static uint32_t vkd3d_spirv_get_op_type_float(struct vkd3d_spirv_builder *builder, + uint32_t width) +{ + return vkd3d_spirv_build_once1(builder, SpvOpTypeFloat, width, vkd3d_spirv_build_op_type_float); +} + +static uint32_t vkd3d_spirv_build_op_type_int(struct vkd3d_spirv_builder *builder, + uint32_t width, uint32_t signedness) +{ + return vkd3d_spirv_build_op_r2(builder, &builder->global_stream, SpvOpTypeInt, width, signedness); +} + +static uint32_t vkd3d_spirv_get_op_type_int(struct vkd3d_spirv_builder *builder, + uint32_t width, uint32_t signedness) +{ + return vkd3d_spirv_build_once2(builder, SpvOpTypeInt, width, signedness, + vkd3d_spirv_build_op_type_int); +} + +static uint32_t vkd3d_spirv_build_op_type_vector(struct vkd3d_spirv_builder *builder, + uint32_t component_type, uint32_t component_count) +{ + return vkd3d_spirv_build_op_r2(builder, &builder->global_stream, + SpvOpTypeVector, component_type, component_count); +} + +static uint32_t vkd3d_spirv_get_op_type_vector(struct vkd3d_spirv_builder *builder, + uint32_t component_type, uint32_t component_count) +{ + return vkd3d_spirv_build_once2(builder, SpvOpTypeVector, component_type, component_count, + vkd3d_spirv_build_op_type_vector); +} + +static uint32_t vkd3d_spirv_build_op_type_array(struct vkd3d_spirv_builder *builder, + uint32_t element_type, uint32_t length_id) +{ + return vkd3d_spirv_build_op_r2(builder, &builder->global_stream, + SpvOpTypeArray, element_type, length_id); +} + +static uint32_t vkd3d_spirv_get_op_type_array(struct vkd3d_spirv_builder *builder, + uint32_t element_type, uint32_t length_id) +{ + return vkd3d_spirv_build_once2(builder, SpvOpTypeArray, element_type, length_id, + vkd3d_spirv_build_op_type_array); +} + +static uint32_t vkd3d_spirv_build_op_type_runtime_array(struct vkd3d_spirv_builder *builder, uint32_t element_type) +{ + return vkd3d_spirv_build_op_r1(builder, &builder->global_stream, SpvOpTypeRuntimeArray, element_type); +} + +static uint32_t vkd3d_spirv_get_op_type_runtime_array(struct vkd3d_spirv_builder *builder, uint32_t element_type) +{ + return vkd3d_spirv_build_once1(builder, SpvOpTypeRuntimeArray, + element_type, vkd3d_spirv_build_op_type_runtime_array); +} + +static uint32_t vkd3d_spirv_build_op_type_struct(struct vkd3d_spirv_builder *builder, + uint32_t *members, unsigned int member_count) +{ + return vkd3d_spirv_build_op_rv(builder, &builder->global_stream, + SpvOpTypeStruct, members, member_count); +} + +static uint32_t vkd3d_spirv_build_op_type_sampler(struct vkd3d_spirv_builder *builder) +{ + return vkd3d_spirv_build_op_r(builder, &builder->global_stream, SpvOpTypeSampler); +} + +static uint32_t vkd3d_spirv_get_op_type_sampler(struct vkd3d_spirv_builder *builder) +{ + return vkd3d_spirv_build_once(builder, &builder->type_sampler_id, vkd3d_spirv_build_op_type_sampler); +} + +/* Access qualifiers are not supported. */ +static uint32_t vkd3d_spirv_build_op_type_image(struct vkd3d_spirv_builder *builder, + uint32_t sampled_type_id, SpvDim dim, uint32_t depth, uint32_t arrayed, + uint32_t ms, uint32_t sampled, SpvImageFormat format) +{ + uint32_t operands[] = {sampled_type_id, dim, depth, arrayed, ms, sampled, format}; + return vkd3d_spirv_build_op_rv(builder, &builder->global_stream, + SpvOpTypeImage, operands, ARRAY_SIZE(operands)); +} + +static uint32_t vkd3d_spirv_get_op_type_image(struct vkd3d_spirv_builder *builder, + uint32_t sampled_type_id, SpvDim dim, uint32_t depth, uint32_t arrayed, + uint32_t ms, uint32_t sampled, SpvImageFormat format) +{ + uint32_t operands[] = {sampled_type_id, dim, depth, arrayed, ms, sampled, format}; + return vkd3d_spirv_build_once7(builder, SpvOpTypeImage, operands, + vkd3d_spirv_build_op_type_image); +} + +static uint32_t vkd3d_spirv_build_op_type_sampled_image(struct vkd3d_spirv_builder *builder, + uint32_t image_type_id) +{ + return vkd3d_spirv_build_op_r1(builder, &builder->global_stream, + SpvOpTypeSampledImage, image_type_id); +} + +static uint32_t vkd3d_spirv_get_op_type_sampled_image(struct vkd3d_spirv_builder *builder, + uint32_t image_type_id) +{ + return vkd3d_spirv_build_once1(builder, SpvOpTypeSampledImage, image_type_id, + vkd3d_spirv_build_op_type_sampled_image); +} + +static uint32_t vkd3d_spirv_build_op_type_function(struct vkd3d_spirv_builder *builder, + uint32_t return_type, const uint32_t *param_types, unsigned int param_count) +{ + return vkd3d_spirv_build_op_r1v(builder, &builder->global_stream, + SpvOpTypeFunction, return_type, param_types, param_count); +} + +static uint32_t vkd3d_spirv_get_op_type_function(struct vkd3d_spirv_builder *builder, + uint32_t return_type, const uint32_t *param_types, unsigned int param_count) +{ + return vkd3d_spirv_build_once1v(builder, SpvOpTypeFunction, return_type, + param_types, param_count, vkd3d_spirv_build_op_type_function); +} + +static uint32_t vkd3d_spirv_build_op_type_pointer(struct vkd3d_spirv_builder *builder, + uint32_t storage_class, uint32_t type_id) +{ + return vkd3d_spirv_build_op_r2(builder, &builder->global_stream, + SpvOpTypePointer, storage_class, type_id); +} + +static uint32_t vkd3d_spirv_get_op_type_pointer(struct vkd3d_spirv_builder *builder, + uint32_t storage_class, uint32_t type_id) +{ + return vkd3d_spirv_build_once2(builder, SpvOpTypePointer, storage_class, type_id, + vkd3d_spirv_build_op_type_pointer); +} + +/* Types larger than 32-bits are not supported. */ +static uint32_t vkd3d_spirv_build_op_constant(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t value) +{ + return vkd3d_spirv_build_op_tr1(builder, &builder->global_stream, + SpvOpConstant, result_type, value); +} + +static uint32_t vkd3d_spirv_get_op_constant(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t value) +{ + return vkd3d_spirv_build_once2(builder, SpvOpConstant, result_type, value, + vkd3d_spirv_build_op_constant); +} + +static uint32_t vkd3d_spirv_build_op_constant64(struct vkd3d_spirv_builder *builder, + uint32_t result_type, const uint32_t *values, unsigned int value_count) +{ + assert(value_count == 2); + return vkd3d_spirv_build_op_trv(builder, &builder->global_stream, + SpvOpConstant, result_type, values, value_count); +} + +static uint32_t vkd3d_spirv_get_op_constant64(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint64_t value) +{ + return vkd3d_spirv_build_once1v(builder, SpvOpConstant, result_type, + (const uint32_t *)&value, 2, vkd3d_spirv_build_op_constant64); +} + +static uint32_t vkd3d_spirv_build_op_constant_composite(struct vkd3d_spirv_builder *builder, + uint32_t result_type, const uint32_t *constituents, unsigned int constituent_count) +{ + return vkd3d_spirv_build_op_trv(builder, &builder->global_stream, + SpvOpConstantComposite, result_type, constituents, constituent_count); +} + +static uint32_t vkd3d_spirv_get_op_constant_composite(struct vkd3d_spirv_builder *builder, + uint32_t result_type, const uint32_t *constituents, unsigned int constituent_count) +{ + return vkd3d_spirv_build_once1v(builder, SpvOpConstantComposite, result_type, + constituents, constituent_count, vkd3d_spirv_build_op_constant_composite); +} + +static uint32_t vkd3d_spirv_build_op_spec_constant(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t value) +{ + return vkd3d_spirv_build_op_tr1(builder, &builder->global_stream, + SpvOpSpecConstant, result_type, value); +} + +static uint32_t vkd3d_spirv_build_op_variable(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, uint32_t type_id, uint32_t storage_class, uint32_t initializer) +{ + return vkd3d_spirv_build_op_tr1v(builder, stream, + SpvOpVariable, type_id, storage_class, &initializer, !!initializer); +} + +static uint32_t vkd3d_spirv_build_op_function(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t result_id, uint32_t function_control, uint32_t function_type) +{ + vkd3d_spirv_build_op3v(&builder->function_stream, + SpvOpFunction, result_type, result_id, function_control, &function_type, 1); + return result_id; +} + +static uint32_t vkd3d_spirv_build_op_function_parameter(struct vkd3d_spirv_builder *builder, + uint32_t result_type) +{ + return vkd3d_spirv_build_op_tr(builder, &builder->function_stream, + SpvOpFunctionParameter, result_type); +} + +static void vkd3d_spirv_build_op_function_end(struct vkd3d_spirv_builder *builder) +{ + vkd3d_spirv_build_op(&builder->function_stream, SpvOpFunctionEnd); +} + +static uint32_t vkd3d_spirv_build_op_function_call(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t function_id, const uint32_t *arguments, unsigned int argument_count) +{ + return vkd3d_spirv_build_op_tr1v(builder, &builder->function_stream, + SpvOpFunctionCall, result_type, function_id, arguments, argument_count); +} + +static uint32_t vkd3d_spirv_build_op_undef(struct vkd3d_spirv_builder *builder, + struct vkd3d_spirv_stream *stream, uint32_t type_id) +{ + return vkd3d_spirv_build_op_tr(builder, stream, SpvOpUndef, type_id); +} + +static uint32_t vkd3d_spirv_build_op_access_chain(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t base_id, uint32_t *indexes, uint32_t index_count) +{ + return vkd3d_spirv_build_op_tr1v(builder, &builder->function_stream, + SpvOpAccessChain, result_type, base_id, indexes, index_count); +} + +static uint32_t vkd3d_spirv_build_op_access_chain1(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t base_id, uint32_t index) +{ + return vkd3d_spirv_build_op_access_chain(builder, result_type, base_id, &index, 1); +} + +static uint32_t vkd3d_spirv_build_op_in_bounds_access_chain(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t base_id, uint32_t *indexes, uint32_t index_count) +{ + return vkd3d_spirv_build_op_tr1v(builder, &builder->function_stream, + SpvOpInBoundsAccessChain, result_type, base_id, indexes, index_count); +} + +static uint32_t vkd3d_spirv_build_op_in_bounds_access_chain1(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t base_id, uint32_t index) +{ + return vkd3d_spirv_build_op_in_bounds_access_chain(builder, result_type, base_id, &index, 1); +} + +static uint32_t vkd3d_spirv_build_op_vector_shuffle(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t vector1_id, uint32_t vector2_id, + const uint32_t *components, uint32_t component_count) +{ + return vkd3d_spirv_build_op_tr2v(builder, &builder->function_stream, SpvOpVectorShuffle, + result_type, vector1_id, vector2_id, components, component_count); +} + +static uint32_t vkd3d_spirv_build_op_composite_construct(struct vkd3d_spirv_builder *builder, + uint32_t result_type, const uint32_t *constituents, unsigned int constituent_count) +{ + return vkd3d_spirv_build_op_trv(builder, &builder->function_stream, SpvOpCompositeConstruct, + result_type, constituents, constituent_count); +} + +static uint32_t vkd3d_spirv_build_op_composite_extract(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t composite_id, const uint32_t *indexes, unsigned int index_count) +{ + return vkd3d_spirv_build_op_tr1v(builder, &builder->function_stream, SpvOpCompositeExtract, + result_type, composite_id, indexes, index_count); +} + +static uint32_t vkd3d_spirv_build_op_composite_extract1(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t composite_id, uint32_t index) +{ + return vkd3d_spirv_build_op_composite_extract(builder, result_type, composite_id, &index, 1); +} + +static uint32_t vkd3d_spirv_build_op_composite_insert(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t object_id, uint32_t composite_id, + const uint32_t *indexes, unsigned int index_count) +{ + return vkd3d_spirv_build_op_tr2v(builder, &builder->function_stream, SpvOpCompositeInsert, + result_type, object_id, composite_id, indexes, index_count); +} + +static uint32_t vkd3d_spirv_build_op_composite_insert1(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t object_id, uint32_t composite_id, uint32_t index) +{ + return vkd3d_spirv_build_op_composite_insert(builder, result_type, object_id, composite_id, &index, 1); +} + +static uint32_t vkd3d_spirv_build_op_load(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t pointer_id, uint32_t memory_access) +{ + if (!memory_access) + return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpLoad, + result_type, pointer_id); + else + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpLoad, + result_type, pointer_id, memory_access); +} + +static void vkd3d_spirv_build_op_store(struct vkd3d_spirv_builder *builder, + uint32_t pointer_id, uint32_t object_id, uint32_t memory_access) +{ + if (!memory_access) + return vkd3d_spirv_build_op2(&builder->function_stream, SpvOpStore, + pointer_id, object_id); + else + return vkd3d_spirv_build_op3(&builder->function_stream, SpvOpStore, + pointer_id, object_id, memory_access); +} + +static void vkd3d_spirv_build_op_copy_memory(struct vkd3d_spirv_builder *builder, + uint32_t target_id, uint32_t source_id, uint32_t memory_access) +{ + if (!memory_access) + return vkd3d_spirv_build_op2(&builder->function_stream, SpvOpCopyMemory, + target_id, source_id); + else + return vkd3d_spirv_build_op3(&builder->function_stream, SpvOpCopyMemory, + target_id, source_id, memory_access); +} + +static uint32_t vkd3d_spirv_build_op_select(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t condition_id, uint32_t object0_id, uint32_t object1_id) +{ + return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, + SpvOpSelect, result_type, condition_id, object0_id, object1_id); +} + +static void vkd3d_spirv_build_op_kill(struct vkd3d_spirv_builder *builder) +{ + vkd3d_spirv_build_op(&builder->function_stream, SpvOpKill); +} + +static void vkd3d_spirv_build_op_demote_to_helper_invocation(struct vkd3d_spirv_builder *builder) +{ + vkd3d_spirv_build_op(&builder->function_stream, SpvOpDemoteToHelperInvocationEXT); +} + +static void vkd3d_spirv_build_op_return(struct vkd3d_spirv_builder *builder) +{ + vkd3d_spirv_build_op(&builder->function_stream, SpvOpReturn); +} + +static uint32_t vkd3d_spirv_build_op_label(struct vkd3d_spirv_builder *builder, + uint32_t label_id) +{ + vkd3d_spirv_build_op1(&builder->function_stream, SpvOpLabel, label_id); + return label_id; +} + +/* Loop control parameters are not supported. */ +static void vkd3d_spirv_build_op_loop_merge(struct vkd3d_spirv_builder *builder, + uint32_t merge_block, uint32_t continue_target, SpvLoopControlMask loop_control) +{ + vkd3d_spirv_build_op3(&builder->function_stream, SpvOpLoopMerge, + merge_block, continue_target, loop_control); +} + +static void vkd3d_spirv_build_op_selection_merge(struct vkd3d_spirv_builder *builder, + uint32_t merge_block, uint32_t selection_control) +{ + vkd3d_spirv_build_op2(&builder->function_stream, SpvOpSelectionMerge, + merge_block, selection_control); +} + +static void vkd3d_spirv_build_op_branch(struct vkd3d_spirv_builder *builder, uint32_t label) +{ + vkd3d_spirv_build_op1(&builder->function_stream, SpvOpBranch, label); +} + +/* Branch weights are not supported. */ +static void vkd3d_spirv_build_op_branch_conditional(struct vkd3d_spirv_builder *builder, + uint32_t condition, uint32_t true_label, uint32_t false_label) +{ + vkd3d_spirv_build_op3(&builder->function_stream, SpvOpBranchConditional, + condition, true_label, false_label); +} + +static void vkd3d_spirv_build_op_switch(struct vkd3d_spirv_builder *builder, + uint32_t selector, uint32_t default_label, uint32_t *targets, unsigned int target_count) +{ + vkd3d_spirv_build_op2v(&builder->function_stream, SpvOpSwitch, + selector, default_label, targets, 2 * target_count); +} + +static uint32_t vkd3d_spirv_build_op_iadd(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpIAdd, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_imul(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpIMul, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_udiv(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpUDiv, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_umod(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpUMod, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_isub(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpISub, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_fdiv(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpFDiv, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_fnegate(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand) +{ + return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, + SpvOpFNegate, result_type, operand); +} + +static uint32_t vkd3d_spirv_build_op_snegate(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand) +{ + return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, + SpvOpSNegate, result_type, operand); +} + +static uint32_t vkd3d_spirv_build_op_and(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpBitwiseAnd, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_shift_left_logical(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t base, uint32_t shift) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpShiftLeftLogical, result_type, base, shift); +} + +static uint32_t vkd3d_spirv_build_op_shift_right_logical(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t base, uint32_t shift) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpShiftRightLogical, result_type, base, shift); +} + +static uint32_t vkd3d_spirv_build_op_logical_and(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpLogicalAnd, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_uless_than(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpULessThan, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_uless_than_equal(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpULessThanEqual, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_convert_utof(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t unsigned_value) +{ + return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, + SpvOpConvertUToF, result_type, unsigned_value); +} + +static uint32_t vkd3d_spirv_build_op_bitcast(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand) +{ + return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, + SpvOpBitcast, result_type, operand); +} + +static uint32_t vkd3d_spirv_build_op_image_texel_pointer(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t image_id, uint32_t coordinate_id, uint32_t sample_id) +{ + return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, + SpvOpImageTexelPointer, result_type, image_id, coordinate_id, sample_id); +} + +static uint32_t vkd3d_spirv_build_op_sampled_image(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t image_id, uint32_t sampler_id) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpSampledImage, result_type, image_id, sampler_id); +} + +static uint32_t vkd3d_spirv_build_op_image(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t sampled_image_id) +{ + return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, + SpvOpImage, result_type, sampled_image_id); +} + +static uint32_t vkd3d_spirv_build_image_instruction(struct vkd3d_spirv_builder *builder, + SpvOp op, uint32_t result_type, const uint32_t *operands, unsigned int operand_count, + uint32_t image_operands_mask, const uint32_t *image_operands, unsigned int image_operand_count) +{ + unsigned int index = 0, i; + uint32_t w[10]; + + assert(operand_count <= ARRAY_SIZE(w)); + for (i = 0; i < operand_count; ++i) + w[index++] = operands[i]; + + if (image_operands_mask) + { + assert(index + 1 + image_operand_count <= ARRAY_SIZE(w)); + w[index++] = image_operands_mask; + for (i = 0; i < image_operand_count; ++i) + w[index++] = image_operands[i]; + } + + return vkd3d_spirv_build_op_trv(builder, &builder->function_stream, + op, result_type, w, index); +} + +static uint32_t vkd3d_spirv_build_op_image_sample(struct vkd3d_spirv_builder *builder, + SpvOp op, uint32_t result_type, uint32_t sampled_image_id, uint32_t coordinate_id, + uint32_t image_operands_mask, const uint32_t *image_operands, unsigned int image_operand_count) +{ + const uint32_t operands[] = {sampled_image_id, coordinate_id}; + + if (op == SpvOpImageSampleExplicitLod) + assert(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); + else + assert(op == SpvOpImageSampleImplicitLod); + + return vkd3d_spirv_build_image_instruction(builder, op, result_type, + operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); +} + +static uint32_t vkd3d_spirv_build_op_image_sample_dref(struct vkd3d_spirv_builder *builder, + SpvOp op, uint32_t result_type, uint32_t sampled_image_id, uint32_t coordinate_id, uint32_t dref_id, + uint32_t image_operands_mask, const uint32_t *image_operands, unsigned int image_operand_count) +{ + const uint32_t operands[] = {sampled_image_id, coordinate_id, dref_id}; + + if (op == SpvOpImageSampleDrefExplicitLod) + assert(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); + else + assert(op == SpvOpImageSampleDrefImplicitLod); + + return vkd3d_spirv_build_image_instruction(builder, op, result_type, + operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); +} + +static uint32_t vkd3d_spirv_build_op_image_gather(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t sampled_image_id, uint32_t coordinate_id, uint32_t component_id, + uint32_t image_operands_mask, const uint32_t *image_operands, unsigned int image_operand_count) +{ + const uint32_t operands[] = {sampled_image_id, coordinate_id, component_id}; + return vkd3d_spirv_build_image_instruction(builder, SpvOpImageGather, result_type, + operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); +} + +static uint32_t vkd3d_spirv_build_op_image_dref_gather(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t sampled_image_id, uint32_t coordinate_id, uint32_t dref_id, + uint32_t image_operands_mask, const uint32_t *image_operands, unsigned int image_operand_count) +{ + const uint32_t operands[] = {sampled_image_id, coordinate_id, dref_id}; + return vkd3d_spirv_build_image_instruction(builder, SpvOpImageDrefGather, result_type, + operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); +} + +static uint32_t vkd3d_spirv_build_op_image_fetch(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t image_id, uint32_t coordinate_id, + uint32_t image_operands_mask, const uint32_t *image_operands, unsigned int image_operand_count) +{ + const uint32_t operands[] = {image_id, coordinate_id}; + return vkd3d_spirv_build_image_instruction(builder, SpvOpImageFetch, result_type, + operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); +} + +static uint32_t vkd3d_spirv_build_op_image_read(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t image_id, uint32_t coordinate_id, + uint32_t image_operands_mask, const uint32_t *image_operands, unsigned int image_operand_count) +{ + const uint32_t operands[] = {image_id, coordinate_id}; + return vkd3d_spirv_build_image_instruction(builder, SpvOpImageRead, result_type, + operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); +} + +static void vkd3d_spirv_build_op_image_write(struct vkd3d_spirv_builder *builder, + uint32_t image_id, uint32_t coordinate_id, uint32_t texel_id, + uint32_t image_operands, const uint32_t *operands, unsigned int operand_count) +{ + if (image_operands) + FIXME("Image operands not supported.\n"); + + vkd3d_spirv_build_op3(&builder->function_stream, SpvOpImageWrite, + image_id, coordinate_id, texel_id); +} + +static uint32_t vkd3d_spirv_build_op_array_length(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t struct_id, uint32_t member_id) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpArrayLength, result_type, struct_id, member_id); +} + +static uint32_t vkd3d_spirv_build_op_image_query_size_lod(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t image_id, uint32_t lod_id) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpImageQuerySizeLod, result_type, image_id, lod_id); +} + +static uint32_t vkd3d_spirv_build_op_image_query_size(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t image_id) +{ + return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, + SpvOpImageQuerySize, result_type, image_id); +} + +static uint32_t vkd3d_spirv_build_op_image_query_levels(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t image_id) +{ + return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, + SpvOpImageQueryLevels, result_type, image_id); +} + +static uint32_t vkd3d_spirv_build_op_image_query_samples(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t image_id) +{ + return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, + SpvOpImageQuerySamples, result_type, image_id); +} + +static uint32_t vkd3d_spirv_build_op_image_query_lod(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t image_id, uint32_t coordinate_id) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpImageQueryLod, result_type, image_id, coordinate_id); +} + +static void vkd3d_spirv_build_op_emit_vertex(struct vkd3d_spirv_builder *builder) +{ + return vkd3d_spirv_build_op(&builder->function_stream, SpvOpEmitVertex); +} + +static void vkd3d_spirv_build_op_end_primitive(struct vkd3d_spirv_builder *builder) +{ + return vkd3d_spirv_build_op(&builder->function_stream, SpvOpEndPrimitive); +} + +static void vkd3d_spirv_build_op_control_barrier(struct vkd3d_spirv_builder *builder, + uint32_t execution_id, uint32_t memory_id, uint32_t memory_semantics_id) +{ + vkd3d_spirv_build_op3(&builder->function_stream, + SpvOpControlBarrier, execution_id, memory_id, memory_semantics_id); +} + +static void vkd3d_spirv_build_op_memory_barrier(struct vkd3d_spirv_builder *builder, + uint32_t memory_id, uint32_t memory_semantics_id) +{ + vkd3d_spirv_build_op2(&builder->function_stream, + SpvOpMemoryBarrier, memory_id, memory_semantics_id); +} + +static uint32_t vkd3d_spirv_build_op_glsl_std450_tr1(struct vkd3d_spirv_builder *builder, + enum GLSLstd450 op, uint32_t result_type, uint32_t operand) +{ + uint32_t id = vkd3d_spirv_get_glsl_std450_instr_set(builder); + return vkd3d_spirv_build_op_ext_inst(builder, result_type, id, op, &operand, 1); +} + +static uint32_t vkd3d_spirv_build_op_glsl_std450_fabs(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand) +{ + return vkd3d_spirv_build_op_glsl_std450_tr1(builder, GLSLstd450FAbs, result_type, operand); +} + +static uint32_t vkd3d_spirv_build_op_glsl_std450_sin(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand) +{ + return vkd3d_spirv_build_op_glsl_std450_tr1(builder, GLSLstd450Sin, result_type, operand); +} + +static uint32_t vkd3d_spirv_build_op_glsl_std450_cos(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand) +{ + return vkd3d_spirv_build_op_glsl_std450_tr1(builder, GLSLstd450Cos, result_type, operand); +} + +static uint32_t vkd3d_spirv_build_op_glsl_std450_nclamp(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t x, uint32_t min, uint32_t max) +{ + uint32_t glsl_std450_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); + uint32_t operands[] = {x, min, max}; + return vkd3d_spirv_build_op_ext_inst(builder, result_type, glsl_std450_id, + GLSLstd450NClamp, operands, ARRAY_SIZE(operands)); +} + +static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, + enum vkd3d_shader_component_type component_type, unsigned int component_count) +{ + uint32_t scalar_id; + + if (component_count == 1) + { + switch (component_type) + { + case VKD3D_SHADER_COMPONENT_VOID: + return vkd3d_spirv_get_op_type_void(builder); + break; + case VKD3D_SHADER_COMPONENT_FLOAT: + return vkd3d_spirv_get_op_type_float(builder, 32); + break; + case VKD3D_SHADER_COMPONENT_INT: + case VKD3D_SHADER_COMPONENT_UINT: + return vkd3d_spirv_get_op_type_int(builder, 32, component_type == VKD3D_SHADER_COMPONENT_INT); + break; + case VKD3D_SHADER_COMPONENT_BOOL: + return vkd3d_spirv_get_op_type_bool(builder); + break; + case VKD3D_SHADER_COMPONENT_DOUBLE: + return vkd3d_spirv_get_op_type_float(builder, 64); + default: + FIXME("Unhandled component type %#x.\n", component_type); + return 0; + } + } + else + { + assert(component_type != VKD3D_SHADER_COMPONENT_VOID); + scalar_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); + } +} + +static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point) +{ + vkd3d_spirv_stream_init(&builder->debug_stream); + vkd3d_spirv_stream_init(&builder->annotation_stream); + vkd3d_spirv_stream_init(&builder->global_stream); + vkd3d_spirv_stream_init(&builder->function_stream); + vkd3d_spirv_stream_init(&builder->execution_mode_stream); + + vkd3d_spirv_stream_init(&builder->insertion_stream); + builder->insertion_location = ~(size_t)0; + + builder->current_id = 1; + + rb_init(&builder->declarations, vkd3d_spirv_declaration_compare); + + builder->main_function_id = vkd3d_spirv_alloc_id(builder); + vkd3d_spirv_build_op_name(builder, builder->main_function_id, entry_point); +} + +static void vkd3d_spirv_builder_begin_main_function(struct vkd3d_spirv_builder *builder) +{ + uint32_t void_id, function_type_id; + + void_id = vkd3d_spirv_get_op_type_void(builder); + function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, NULL, 0); + + vkd3d_spirv_build_op_function(builder, void_id, + builder->main_function_id, SpvFunctionControlMaskNone, function_type_id); + vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); + builder->main_function_location = vkd3d_spirv_stream_current_location(&builder->function_stream); +} + +static void vkd3d_spirv_builder_free(struct vkd3d_spirv_builder *builder) +{ + vkd3d_spirv_stream_free(&builder->debug_stream); + vkd3d_spirv_stream_free(&builder->annotation_stream); + vkd3d_spirv_stream_free(&builder->global_stream); + vkd3d_spirv_stream_free(&builder->function_stream); + vkd3d_spirv_stream_free(&builder->execution_mode_stream); + + vkd3d_spirv_stream_free(&builder->insertion_stream); + + vkd3d_free(builder->capabilities); + + rb_destroy(&builder->declarations, vkd3d_spirv_declaration_free, NULL); + + vkd3d_free(builder->iface); +} + +static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, + struct vkd3d_shader_code *spirv, const char *entry_point) +{ + uint64_t capability_mask = builder->capability_mask; + struct vkd3d_spirv_stream stream; + uint32_t *code; + unsigned int i; + size_t size; + + vkd3d_spirv_stream_init(&stream); + + vkd3d_spirv_build_word(&stream, SpvMagicNumber); + vkd3d_spirv_build_word(&stream, VKD3D_SPIRV_VERSION); + vkd3d_spirv_build_word(&stream, VKD3D_SPIRV_GENERATOR_MAGIC); + vkd3d_spirv_build_word(&stream, builder->current_id); /* bound */ + vkd3d_spirv_build_word(&stream, 0); /* schema, reserved */ + + /* capabilities */ + for (i = 0; capability_mask; ++i) + { + if (capability_mask & 1) + vkd3d_spirv_build_op_capability(&stream, i); + capability_mask >>= 1; + } + for (i = 0; i < builder->capabilities_count; ++i) + vkd3d_spirv_build_op_capability(&stream, builder->capabilities[i]); + + /* extensions */ + if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityDrawParameters)) + vkd3d_spirv_build_op_extension(&stream, "SPV_KHR_shader_draw_parameters"); + if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityDemoteToHelperInvocationEXT)) + vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_demote_to_helper_invocation"); + if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityRuntimeDescriptorArrayEXT) + || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityUniformBufferArrayDynamicIndexing) + || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT) + || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilitySampledImageArrayDynamicIndexing) + || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStorageBufferArrayDynamicIndexing) + || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT) + || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStorageImageArrayDynamicIndexing) + || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderNonUniformEXT)) + vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_descriptor_indexing"); + if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStencilExportEXT)) + vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_shader_stencil_export"); + + if (builder->ext_instr_set_glsl_450) + vkd3d_spirv_build_op_ext_inst_import(&stream, builder->ext_instr_set_glsl_450, "GLSL.std.450"); + + /* entry point declarations */ + vkd3d_spirv_build_op_memory_model(&stream, SpvAddressingModelLogical, SpvMemoryModelGLSL450); + vkd3d_spirv_build_op_entry_point(&stream, builder->execution_model, builder->main_function_id, + entry_point, builder->iface, builder->iface_element_count); + + /* execution mode declarations */ + if (builder->invocation_count) + vkd3d_spirv_build_op_execution_mode(&builder->execution_mode_stream, + builder->main_function_id, SpvExecutionModeInvocations, &builder->invocation_count, 1); + vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream); + + vkd3d_spirv_stream_append(&stream, &builder->debug_stream); + vkd3d_spirv_stream_append(&stream, &builder->annotation_stream); + vkd3d_spirv_stream_append(&stream, &builder->global_stream); + vkd3d_spirv_stream_append(&stream, &builder->function_stream); + + if (!(code = vkd3d_calloc(stream.word_count, sizeof(*code)))) + { + vkd3d_spirv_stream_free(&stream); + return false; + } + + size = stream.word_count * sizeof(*code); + memcpy(code, stream.words, size); + vkd3d_spirv_stream_free(&stream); + + spirv->code = code; + spirv->size = size; + + return true; +} + +static const struct vkd3d_spirv_resource_type +{ + enum vkd3d_shader_resource_type resource_type; + + SpvDim dim; + uint32_t arrayed; + uint32_t ms; + + unsigned int coordinate_component_count; + unsigned int offset_component_count; + + SpvCapability capability; + SpvCapability uav_capability; +} +vkd3d_spirv_resource_type_table[] = +{ + {VKD3D_SHADER_RESOURCE_BUFFER, SpvDimBuffer, 0, 0, 1, 0, + SpvCapabilitySampledBuffer, SpvCapabilityImageBuffer}, + {VKD3D_SHADER_RESOURCE_TEXTURE_1D, SpvDim1D, 0, 0, 1, 1, + SpvCapabilitySampled1D, SpvCapabilityImage1D}, + {VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, SpvDim2D, 0, 1, 2, 2}, + {VKD3D_SHADER_RESOURCE_TEXTURE_2D, SpvDim2D, 0, 0, 2, 2}, + {VKD3D_SHADER_RESOURCE_TEXTURE_3D, SpvDim3D, 0, 0, 3, 3}, + {VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, SpvDimCube, 0, 0, 3, 0}, + {VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, SpvDim1D, 1, 0, 2, 1, + SpvCapabilitySampled1D, SpvCapabilityImage1D}, + {VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, SpvDim2D, 1, 0, 3, 2}, + {VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, SpvDim2D, 1, 1, 3, 2}, + {VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, SpvDimCube, 1, 0, 4, 0, + SpvCapabilitySampledCubeArray, SpvCapabilityImageCubeArray}, +}; + +static const struct vkd3d_spirv_resource_type *vkd3d_get_spirv_resource_type( + enum vkd3d_shader_resource_type resource_type) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(vkd3d_spirv_resource_type_table); ++i) + { + const struct vkd3d_spirv_resource_type* current = &vkd3d_spirv_resource_type_table[i]; + + if (current->resource_type == resource_type) + return current; + } + + FIXME("Unhandled resource type %#x.\n", resource_type); + return NULL; +} + +struct vkd3d_symbol_register +{ + enum vkd3d_shader_register_type type; + unsigned int idx; +}; + +struct vkd3d_symbol_resource +{ + enum vkd3d_shader_register_type type; + unsigned int idx; +}; + +struct vkd3d_symbol_sampler +{ + unsigned int id; +}; + +struct vkd3d_symbol_combined_sampler +{ + enum vkd3d_shader_register_type resource_type; + unsigned int resource_id; + unsigned int sampler_space; + unsigned int sampler_index; +}; + +struct vkd3d_symbol_descriptor_array +{ + uint32_t ptr_type_id; + unsigned int set; + unsigned int binding; + unsigned int push_constant_index; +}; + +struct vkd3d_symbol_register_data +{ + SpvStorageClass storage_class; + uint32_t member_idx; + enum vkd3d_shader_component_type component_type; + unsigned int write_mask; + uint32_t dcl_mask; + unsigned int structure_stride; + unsigned int binding_base_idx; + bool is_aggregate; /* An aggregate, i.e. a structure or an array. */ + bool is_dynamically_indexed; /* If member_idx is a variable ID instead of a constant. */ +}; + +struct vkd3d_symbol_resource_data +{ + struct vkd3d_shader_register_range range; + enum vkd3d_shader_component_type sampled_type; + uint32_t type_id; + const struct vkd3d_spirv_resource_type *resource_type_info; + unsigned int structure_stride; + bool raw; + unsigned int binding_base_idx; + uint32_t uav_counter_id; + const struct vkd3d_symbol *uav_counter_array; + unsigned int uav_counter_base_idx; +}; + +struct vkd3d_symbol_sampler_data +{ + struct vkd3d_shader_register_range range; +}; + +struct vkd3d_descriptor_binding_address +{ + unsigned int binding_base_idx; + unsigned int push_constant_index; +}; + +struct vkd3d_symbol_descriptor_array_data +{ + SpvStorageClass storage_class; + uint32_t contained_type_id; +}; + +struct vkd3d_symbol +{ + struct rb_entry entry; + + enum + { + VKD3D_SYMBOL_REGISTER, + VKD3D_SYMBOL_RESOURCE, + VKD3D_SYMBOL_SAMPLER, + VKD3D_SYMBOL_COMBINED_SAMPLER, + VKD3D_SYMBOL_DESCRIPTOR_ARRAY, + } type; + + union + { + struct vkd3d_symbol_register reg; + struct vkd3d_symbol_resource resource; + struct vkd3d_symbol_sampler sampler; + struct vkd3d_symbol_combined_sampler combined_sampler; + struct vkd3d_symbol_descriptor_array descriptor_array; + } key; + + uint32_t id; + /* The array declaration which this symbol maps to, or NULL. */ + const struct vkd3d_symbol *descriptor_array; + + union + { + struct vkd3d_symbol_register_data reg; + struct vkd3d_symbol_resource_data resource; + struct vkd3d_symbol_sampler_data sampler; + struct vkd3d_symbol_descriptor_array_data descriptor_array; + } info; +}; + +static int vkd3d_symbol_compare(const void *key, const struct rb_entry *entry) +{ + const struct vkd3d_symbol *a = key; + const struct vkd3d_symbol *b = RB_ENTRY_VALUE(entry, const struct vkd3d_symbol, entry); + int ret; + + if ((ret = vkd3d_u32_compare(a->type, b->type))) + return ret; + return memcmp(&a->key, &b->key, sizeof(a->key)); +} + +static void vkd3d_symbol_free(struct rb_entry *entry, void *context) +{ + struct vkd3d_symbol *s = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); + + vkd3d_free(s); +} + +static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, + const struct vkd3d_shader_register *reg) +{ + symbol->type = VKD3D_SYMBOL_REGISTER; + memset(&symbol->key, 0, sizeof(symbol->key)); + symbol->key.reg.type = reg->type; + if (vkd3d_shader_register_is_input(reg) && reg->idx[1].offset != ~0u) + symbol->key.reg.idx = reg->idx[1].offset; + else if (reg->type != VKD3DSPR_IMMCONSTBUFFER) + symbol->key.reg.idx = reg->idx[0].offset; +} + +static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, + uint32_t val_id, SpvStorageClass storage_class, + enum vkd3d_shader_component_type component_type, DWORD write_mask) +{ + symbol->id = val_id; + symbol->descriptor_array = NULL; + symbol->info.reg.storage_class = storage_class; + symbol->info.reg.member_idx = 0; + symbol->info.reg.component_type = component_type; + symbol->info.reg.write_mask = write_mask; + symbol->info.reg.dcl_mask = 0; + symbol->info.reg.structure_stride = 0; + symbol->info.reg.binding_base_idx = 0; + symbol->info.reg.is_aggregate = false; + symbol->info.reg.is_dynamically_indexed = false; +} + +static void vkd3d_symbol_make_resource(struct vkd3d_symbol *symbol, + const struct vkd3d_shader_register *reg) +{ + symbol->type = VKD3D_SYMBOL_RESOURCE; + memset(&symbol->key, 0, sizeof(symbol->key)); + symbol->key.resource.type = reg->type; + symbol->key.resource.idx = reg->idx[0].offset; +} + +static void vkd3d_symbol_make_sampler(struct vkd3d_symbol *symbol, + const struct vkd3d_shader_register *reg) +{ + symbol->type = VKD3D_SYMBOL_SAMPLER; + memset(&symbol->key, 0, sizeof(symbol->key)); + symbol->key.sampler.id = reg->idx[0].offset; +} + +static void vkd3d_symbol_make_combined_sampler(struct vkd3d_symbol *symbol, + const struct vkd3d_shader_register *resource_reg, unsigned int sampler_space, unsigned int sampler_index) +{ + symbol->type = VKD3D_SYMBOL_COMBINED_SAMPLER; + memset(&symbol->key, 0, sizeof(symbol->key)); + symbol->key.combined_sampler.resource_type = resource_reg->type; + symbol->key.combined_sampler.resource_id = resource_reg->idx[0].offset; + symbol->key.combined_sampler.sampler_space = sampler_space; + symbol->key.combined_sampler.sampler_index = sampler_index; +} + +static struct vkd3d_symbol *vkd3d_symbol_dup(const struct vkd3d_symbol *symbol) +{ + struct vkd3d_symbol *s; + + if (!(s = vkd3d_malloc(sizeof(*s)))) + return NULL; + + return memcpy(s, symbol, sizeof(*s)); +} + +static const char *debug_vkd3d_symbol(const struct vkd3d_symbol *symbol) +{ + switch (symbol->type) + { + case VKD3D_SYMBOL_REGISTER: + return vkd3d_dbg_sprintf("register %#x, %u", + symbol->key.reg.type, symbol->key.reg.idx); + case VKD3D_SYMBOL_RESOURCE: + return vkd3d_dbg_sprintf("resource %#x, %u", + symbol->key.resource.type, symbol->key.resource.idx); + case VKD3D_SYMBOL_SAMPLER: + return vkd3d_dbg_sprintf("sampler %u", + symbol->key.sampler.id); + default: + return vkd3d_dbg_sprintf("type %#x", symbol->type); + } +} + +struct vkd3d_if_cf_info +{ + size_t stream_location; + unsigned int id; + uint32_t merge_block_id; + uint32_t else_block_id; +}; + +struct vkd3d_loop_cf_info +{ + uint32_t header_block_id; + uint32_t continue_block_id; + uint32_t merge_block_id; +}; + +struct vkd3d_switch_cf_info +{ + size_t stream_location; + unsigned int id; + uint32_t selector_id; + uint32_t merge_block_id; + uint32_t default_block_id; + uint32_t *case_blocks; + size_t case_blocks_size; + unsigned int case_block_count; +}; + +struct vkd3d_control_flow_info +{ + union + { + struct vkd3d_if_cf_info if_; + struct vkd3d_loop_cf_info loop; + struct vkd3d_switch_cf_info switch_; + } u; + + enum + { + VKD3D_BLOCK_IF, + VKD3D_BLOCK_LOOP, + VKD3D_BLOCK_SWITCH, + } current_block; + bool inside_block; +}; + +struct vkd3d_push_constant_buffer_binding +{ + struct vkd3d_shader_register reg; + struct vkd3d_shader_push_constant_buffer pc; + unsigned int size; +}; + +struct vkd3d_shader_phase +{ + enum vkd3d_shader_opcode type; + unsigned int idx; + unsigned int instance_count; + uint32_t function_id; + uint32_t instance_id; + size_t function_location; +}; + +struct vkd3d_shader_spec_constant +{ + enum vkd3d_shader_parameter_name name; + uint32_t id; +}; + +struct vkd3d_hull_shader_variables +{ + uint32_t tess_level_outer_id; + uint32_t tess_level_inner_id; + uint32_t patch_constants_id; +}; + +struct vkd3d_dxbc_compiler +{ + struct vkd3d_spirv_builder spirv_builder; + + struct vkd3d_shader_message_context *message_context; + const struct vkd3d_shader_location *location; + bool failed; + + bool strip_debug; + bool ssbo_uavs; + + struct rb_tree symbol_table; + uint32_t temp_id; + unsigned int temp_count; + struct vkd3d_hull_shader_variables hs; + uint32_t sample_positions_id; + + enum vkd3d_shader_type shader_type; + + unsigned int branch_id; + unsigned int loop_id; + unsigned int switch_id; + unsigned int control_flow_depth; + struct vkd3d_control_flow_info *control_flow_info; + size_t control_flow_info_size; + + struct vkd3d_shader_interface_info shader_interface; + struct vkd3d_shader_descriptor_offset_info offset_info; + uint32_t descriptor_offsets_member_id; + uint32_t push_constants_var_id; + uint32_t *descriptor_offset_ids; + struct vkd3d_push_constant_buffer_binding *push_constants; + const struct vkd3d_shader_spirv_target_info *spirv_target_info; + + bool after_declarations_section; + const struct vkd3d_shader_signature *input_signature; + const struct vkd3d_shader_signature *output_signature; + const struct vkd3d_shader_signature *patch_constant_signature; + const struct vkd3d_shader_transform_feedback_info *xfb_info; + struct vkd3d_shader_output_info + { + uint32_t id; + enum vkd3d_shader_component_type component_type; + uint32_t array_element_mask; + } *output_info; + uint32_t private_output_variable[MAX_REG_OUTPUT + 1]; /* 1 entry for oDepth */ + uint32_t private_output_variable_array_idx[MAX_REG_OUTPUT + 1]; /* 1 entry for oDepth */ + uint32_t private_output_variable_write_mask[MAX_REG_OUTPUT + 1]; /* 1 entry for oDepth */ + uint32_t epilogue_function_id; + + uint32_t binding_idx; + + const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; + unsigned int input_control_point_count; + unsigned int output_control_point_count; + bool use_vocp; + + unsigned int shader_phase_count; + struct vkd3d_shader_phase *shader_phases; + size_t shader_phases_size; + + uint32_t current_spec_constant_id; + unsigned int spec_constant_count; + struct vkd3d_shader_spec_constant *spec_constants; + size_t spec_constants_size; + enum vkd3d_shader_compile_option_formatting_flags formatting; + + struct vkd3d_string_buffer_cache string_buffers; +}; + +static bool is_control_point_phase(const struct vkd3d_shader_phase *phase) +{ + return phase && phase->type == VKD3DSIH_HS_CONTROL_POINT_PHASE; +} + +static void vkd3d_dxbc_compiler_emit_initial_declarations(struct vkd3d_dxbc_compiler *compiler); + +static const char *vkd3d_dxbc_compiler_get_entry_point_name(const struct vkd3d_dxbc_compiler *compiler) +{ + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + + return info && info->entry_point ? info->entry_point : "main"; +} + +struct vkd3d_dxbc_compiler *vkd3d_dxbc_compiler_create(const struct vkd3d_shader_version *shader_version, + const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) +{ + const struct vkd3d_shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; + const struct vkd3d_shader_signature *output_signature = &shader_desc->output_signature; + const struct vkd3d_shader_interface_info *shader_interface; + const struct vkd3d_shader_descriptor_offset_info *offset_info; + const struct vkd3d_shader_spirv_target_info *target_info; + struct vkd3d_dxbc_compiler *compiler; + unsigned int max_element_count; + unsigned int i; + + if (!(compiler = vkd3d_malloc(sizeof(*compiler)))) + return NULL; + + memset(compiler, 0, sizeof(*compiler)); + compiler->message_context = message_context; + compiler->location = location; + + if ((target_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO))) + { + switch (target_info->environment) + { + case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5: + case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: + break; + default: + WARN("Invalid target environment %#x.\n", target_info->environment); + vkd3d_free(compiler); + return NULL; + } + + compiler->spirv_target_info = target_info; + } + + max_element_count = max(output_signature->element_count, patch_constant_signature->element_count); + if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) + { + vkd3d_free(compiler); + return NULL; + } + + vkd3d_spirv_builder_init(&compiler->spirv_builder, vkd3d_dxbc_compiler_get_entry_point_name(compiler)); + + compiler->formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT + | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER; + + for (i = 0; i < compile_info->option_count; ++i) + { + const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; + + switch (option->name) + { + case VKD3D_SHADER_COMPILE_OPTION_STRIP_DEBUG: + compiler->strip_debug = !!option->value; + break; + + case VKD3D_SHADER_COMPILE_OPTION_BUFFER_UAV: + if (option->value == VKD3D_SHADER_COMPILE_OPTION_BUFFER_UAV_STORAGE_TEXEL_BUFFER) + compiler->ssbo_uavs = false; + else if (option->value == VKD3D_SHADER_COMPILE_OPTION_BUFFER_UAV_STORAGE_BUFFER) + compiler->ssbo_uavs = true; + else + WARN("Ignoring unrecognised value %#x for option %#x.\n", option->value, option->name); + break; + + case VKD3D_SHADER_COMPILE_OPTION_FORMATTING: + compiler->formatting = option->value; + break; + + default: + WARN("Ignoring unrecognised option %#x with value %#x.\n", option->name, option->value); + break; + } + } + + rb_init(&compiler->symbol_table, vkd3d_symbol_compare); + + compiler->shader_type = shader_version->type; + + compiler->input_signature = &shader_desc->input_signature; + compiler->output_signature = &shader_desc->output_signature; + compiler->patch_constant_signature = &shader_desc->patch_constant_signature; + + if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) + { + compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); + + compiler->shader_interface = *shader_interface; + if (shader_interface->push_constant_buffer_count) + { + if (!(compiler->push_constants = vkd3d_calloc(shader_interface->push_constant_buffer_count, + sizeof(*compiler->push_constants)))) + { + vkd3d_dxbc_compiler_destroy(compiler); + return NULL; + } + for (i = 0; i < shader_interface->push_constant_buffer_count; ++i) + compiler->push_constants[i].pc = shader_interface->push_constant_buffers[i]; + } + + if ((offset_info = vkd3d_find_struct(shader_interface->next, DESCRIPTOR_OFFSET_INFO))) + { + compiler->offset_info = *offset_info; + if (compiler->offset_info.descriptor_table_count && !(compiler->descriptor_offset_ids = vkd3d_calloc( + compiler->offset_info.descriptor_table_count, sizeof(*compiler->descriptor_offset_ids)))) + { + vkd3d_dxbc_compiler_destroy(compiler); + return NULL; + } + } + } + + compiler->scan_descriptor_info = scan_descriptor_info; + + vkd3d_string_buffer_cache_init(&compiler->string_buffers); + + vkd3d_dxbc_compiler_emit_initial_declarations(compiler); + + return compiler; +} + +static bool vkd3d_dxbc_compiler_use_storage_buffer(const struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_symbol_resource_data *resource) +{ + return compiler->ssbo_uavs && resource->resource_type_info->resource_type == VKD3D_SHADER_RESOURCE_BUFFER; +} + +static enum vkd3d_shader_spirv_environment vkd3d_dxbc_compiler_get_target_environment( + const struct vkd3d_dxbc_compiler *compiler) +{ + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + + return info ? info->environment : VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; +} + +static bool vkd3d_dxbc_compiler_is_opengl_target(const struct vkd3d_dxbc_compiler *compiler) +{ + return vkd3d_dxbc_compiler_get_target_environment(compiler) == VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5; +} + +static bool vkd3d_dxbc_compiler_is_target_extension_supported(const struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_spirv_extension extension) +{ + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + unsigned int i; + + for (i = 0; info && i < info->extension_count; ++i) + { + if (info->extensions[i] == extension) + return true; + } + + return false; +} + +static bool vkd3d_dxbc_compiler_check_shader_visibility(const struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_visibility visibility) +{ + switch (visibility) + { + case VKD3D_SHADER_VISIBILITY_ALL: + return true; + case VKD3D_SHADER_VISIBILITY_VERTEX: + return compiler->shader_type == VKD3D_SHADER_TYPE_VERTEX; + case VKD3D_SHADER_VISIBILITY_HULL: + return compiler->shader_type == VKD3D_SHADER_TYPE_HULL; + case VKD3D_SHADER_VISIBILITY_DOMAIN: + return compiler->shader_type == VKD3D_SHADER_TYPE_DOMAIN; + case VKD3D_SHADER_VISIBILITY_GEOMETRY: + return compiler->shader_type == VKD3D_SHADER_TYPE_GEOMETRY; + case VKD3D_SHADER_VISIBILITY_PIXEL: + return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL; + case VKD3D_SHADER_VISIBILITY_COMPUTE: + return compiler->shader_type == VKD3D_SHADER_TYPE_COMPUTE; + default: + ERR("Invalid shader visibility %#x.\n", visibility); + return false; + } +} + +static struct vkd3d_push_constant_buffer_binding *vkd3d_dxbc_compiler_find_push_constant_buffer( + const struct vkd3d_dxbc_compiler *compiler, const struct vkd3d_shader_constant_buffer *cb) +{ + unsigned int register_space = cb->range.space; + unsigned int reg_idx = cb->range.first; + unsigned int i; + + if (cb->range.first != cb->range.last) + return NULL; + + for (i = 0; i < compiler->shader_interface.push_constant_buffer_count; ++i) + { + struct vkd3d_push_constant_buffer_binding *current = &compiler->push_constants[i]; + + if (!vkd3d_dxbc_compiler_check_shader_visibility(compiler, current->pc.shader_visibility)) + continue; + + if (current->pc.register_space == register_space && current->pc.register_index == reg_idx) + return current; + } + + return NULL; +} + +static bool vkd3d_dxbc_compiler_has_combined_sampler(const struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_resource *resource, const struct vkd3d_shader_sampler *sampler) +{ + const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; + const struct vkd3d_shader_combined_resource_sampler *combined_sampler; + unsigned int i; + + if (!shader_interface->combined_sampler_count) + return false; + + if (resource && (resource->reg.reg.type == VKD3DSPR_UAV || resource->range.last != resource->range.first)) + return false; + + if (sampler && sampler->range.first != sampler->range.last) + return false; + + for (i = 0; i < shader_interface->combined_sampler_count; ++i) + { + combined_sampler = &shader_interface->combined_samplers[i]; + + if (!vkd3d_dxbc_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) + continue; + + if ((!resource || (combined_sampler->resource_space == resource->range.space + && combined_sampler->resource_index == resource->range.first)) + && (!sampler || (combined_sampler->sampler_space == sampler->range.space + && combined_sampler->sampler_index == sampler->range.first))) + return true; + } + + return false; +} + +static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_dxbc_compiler_error(struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_error error, const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_verror(compiler->message_context, compiler->location, error, format, args); + va_end(args); + compiler->failed = true; +} + +static struct vkd3d_string_buffer *vkd3d_shader_register_range_string(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register_range *range) +{ + struct vkd3d_string_buffer *buffer = vkd3d_string_buffer_get(&compiler->string_buffers); + + if (!buffer) + return NULL; + + if (range->last != ~0u) + vkd3d_string_buffer_printf(buffer, "[%u:%u]", range->first, range->last); + else + vkd3d_string_buffer_printf(buffer, "[%u:*]", range->first); + + return buffer; +} + +static struct vkd3d_shader_descriptor_binding vkd3d_dxbc_compiler_get_descriptor_binding( + struct vkd3d_dxbc_compiler *compiler, const struct vkd3d_shader_register *reg, + const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, + bool is_uav_counter, struct vkd3d_descriptor_binding_address *binding_address) +{ + const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; + unsigned int register_last = (range->last == ~0u) ? range->first : range->last; + const struct vkd3d_shader_descriptor_offset *binding_offsets; + enum vkd3d_shader_descriptor_type descriptor_type; + enum vkd3d_shader_binding_flag resource_type_flag; + struct vkd3d_shader_descriptor_binding binding; + unsigned int i; + + if (reg->type == VKD3DSPR_CONSTBUFFER) + descriptor_type = VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + else if (reg->type == VKD3DSPR_RESOURCE) + descriptor_type = VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; + else if (reg->type == VKD3DSPR_UAV) + descriptor_type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; + else if (reg->type == VKD3DSPR_SAMPLER) + descriptor_type = VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; + else + { + FIXME("Unhandled register type %#x.\n", reg->type); + vkd3d_dxbc_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE, + "Encountered invalid/unhandled register type %#x.", reg->type); + goto done; + } + + resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER + ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + + if (is_uav_counter) + { + assert(descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV); + binding_offsets = compiler->offset_info.uav_counter_offsets; + for (i = 0; i < shader_interface->uav_counter_count; ++i) + { + const struct vkd3d_shader_uav_counter_binding *current = &shader_interface->uav_counters[i]; + + if (!vkd3d_dxbc_compiler_check_shader_visibility(compiler, current->shader_visibility)) + continue; + + if (current->register_space != range->space || current->register_index > range->first + || current->binding.count <= register_last - current->register_index) + continue; + + if (current->offset) + { + FIXME("Atomic counter offsets are not supported yet.\n"); + vkd3d_dxbc_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_DESCRIPTOR_BINDING, + "Descriptor binding for UAV counter %u, space %u has unsupported ‘offset’ %u.", + range->first, range->space, current->offset); + } + + binding_address->binding_base_idx = current->register_index + - (binding_offsets ? binding_offsets[i].static_offset : 0); + binding_address->push_constant_index = binding_offsets ? binding_offsets[i].dynamic_offset_index : ~0u; + return current->binding; + } + if (shader_interface->uav_counter_count) + { + FIXME("Could not find descriptor binding for UAV counter %u, space %u.\n", range->first, range->space); + vkd3d_dxbc_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND, + "Could not find descriptor binding for UAV counter %u, space %u.", range->first, range->space); + } + } + else + { + binding_offsets = compiler->offset_info.binding_offsets; + for (i = 0; i < shader_interface->binding_count; ++i) + { + const struct vkd3d_shader_resource_binding *current = &shader_interface->bindings[i]; + + if (!(current->flags & resource_type_flag)) + continue; + + if (!vkd3d_dxbc_compiler_check_shader_visibility(compiler, current->shader_visibility)) + continue; + + if (current->type != descriptor_type || current->register_space != range->space + || current->register_index > range->first + || current->binding.count <= register_last - current->register_index) + continue; + + binding_address->binding_base_idx = current->register_index + - (binding_offsets ? binding_offsets[i].static_offset : 0); + binding_address->push_constant_index = binding_offsets ? binding_offsets[i].dynamic_offset_index : ~0u; + return current->binding; + } + if (shader_interface->binding_count) + { + struct vkd3d_string_buffer *buffer = vkd3d_shader_register_range_string(compiler, range); + const char *range_str = buffer ? buffer->buffer : ""; + FIXME("Could not find descriptor binding for type %#x, space %u, registers %s, shader type %#x.\n", + descriptor_type, range->space, range_str, compiler->shader_type); + vkd3d_dxbc_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND, + "Could not find descriptor binding for type %#x, space %u, registers %s, shader type %#x.", + descriptor_type, range->space, range_str, compiler->shader_type); + vkd3d_string_buffer_release(&compiler->string_buffers, buffer); + } + } + +done: + binding_address->binding_base_idx = range->first; + binding_address->push_constant_index = ~0u; + binding.set = 0; + binding.count = 1; + binding.binding = compiler->binding_idx++; + return binding; +} + +static void vkd3d_dxbc_compiler_emit_descriptor_binding(struct vkd3d_dxbc_compiler *compiler, + uint32_t variable_id, const struct vkd3d_shader_descriptor_binding *binding) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + + vkd3d_spirv_build_op_decorate1(builder, variable_id, SpvDecorationDescriptorSet, binding->set); + vkd3d_spirv_build_op_decorate1(builder, variable_id, SpvDecorationBinding, binding->binding); +} + +static void vkd3d_dxbc_compiler_decorate_nonuniform(struct vkd3d_dxbc_compiler *compiler, + uint32_t expression_id) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + + vkd3d_spirv_enable_capability(builder, SpvCapabilityShaderNonUniformEXT); + vkd3d_spirv_build_op_decorate(builder, expression_id, SpvDecorationNonUniformEXT, NULL, 0); +} + +static const struct vkd3d_symbol *vkd3d_dxbc_compiler_put_symbol(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_symbol *symbol) +{ + struct vkd3d_symbol *s; + + s = vkd3d_symbol_dup(symbol); + if (rb_put(&compiler->symbol_table, s, &s->entry) == -1) + { + ERR("Failed to insert symbol entry (%s).\n", debug_vkd3d_symbol(symbol)); + vkd3d_free(s); + return NULL; + } + return s; +} + +static uint32_t vkd3d_dxbc_compiler_get_constant(struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_component_type component_type, unsigned int component_count, const uint32_t *values) +{ + uint32_t type_id, scalar_type_id, component_ids[VKD3D_VEC4_SIZE]; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int i; + + assert(0 < component_count && component_count <= VKD3D_VEC4_SIZE); + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + + switch (component_type) + { + case VKD3D_SHADER_COMPONENT_UINT: + case VKD3D_SHADER_COMPONENT_INT: + case VKD3D_SHADER_COMPONENT_FLOAT: + break; + default: + FIXME("Unhandled component_type %#x.\n", component_type); + return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); + } + + if (component_count == 1) + { + return vkd3d_spirv_get_op_constant(builder, type_id, *values); + } + else + { + scalar_type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + for (i = 0; i < component_count; ++i) + component_ids[i] = vkd3d_spirv_get_op_constant(builder, scalar_type_id, values[i]); + return vkd3d_spirv_get_op_constant_composite(builder, type_id, component_ids, component_count); + } +} + +static uint32_t vkd3d_dxbc_compiler_get_constant64(struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_component_type component_type, unsigned int component_count, const uint64_t *values) +{ + uint32_t type_id, scalar_type_id, component_ids[VKD3D_DVEC2_SIZE]; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int i; + + assert(0 < component_count && component_count <= VKD3D_DVEC2_SIZE); + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + + if (component_type != VKD3D_SHADER_COMPONENT_DOUBLE) + { + FIXME("Unhandled component_type %#x.\n", component_type); + return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); + } + + if (component_count == 1) + { + return vkd3d_spirv_get_op_constant64(builder, type_id, *values); + } + else + { + scalar_type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + for (i = 0; i < component_count; ++i) + component_ids[i] = vkd3d_spirv_get_op_constant64(builder, scalar_type_id, values[i]); + return vkd3d_spirv_get_op_constant_composite(builder, type_id, component_ids, component_count); + } +} + +static uint32_t vkd3d_dxbc_compiler_get_constant_uint(struct vkd3d_dxbc_compiler *compiler, + uint32_t value) +{ + return vkd3d_dxbc_compiler_get_constant(compiler, VKD3D_SHADER_COMPONENT_UINT, 1, &value); +} + +static uint32_t vkd3d_dxbc_compiler_get_constant_float(struct vkd3d_dxbc_compiler *compiler, + float value) +{ + return vkd3d_dxbc_compiler_get_constant(compiler, VKD3D_SHADER_COMPONENT_FLOAT, 1, (uint32_t *)&value); +} + +static uint32_t vkd3d_dxbc_compiler_get_constant_vector(struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_component_type component_type, unsigned int component_count, uint32_t value) +{ + const uint32_t values[] = {value, value, value, value}; + return vkd3d_dxbc_compiler_get_constant(compiler, component_type, component_count, values); +} + +static uint32_t vkd3d_dxbc_compiler_get_constant_uint_vector(struct vkd3d_dxbc_compiler *compiler, + uint32_t value, unsigned int component_count) +{ + return vkd3d_dxbc_compiler_get_constant_vector(compiler, VKD3D_SHADER_COMPONENT_UINT, component_count, value); +} + +static uint32_t vkd3d_dxbc_compiler_get_constant_float_vector(struct vkd3d_dxbc_compiler *compiler, + float value, unsigned int component_count) +{ + const float values[] = {value, value, value, value}; + return vkd3d_dxbc_compiler_get_constant(compiler, VKD3D_SHADER_COMPONENT_FLOAT, + component_count, (const uint32_t *)values); +} + +static uint32_t vkd3d_dxbc_compiler_get_constant_double_vector(struct vkd3d_dxbc_compiler *compiler, + double value, unsigned int component_count) +{ + const double values[] = {value, value}; + return vkd3d_dxbc_compiler_get_constant64(compiler, VKD3D_SHADER_COMPONENT_DOUBLE, + component_count, (const uint64_t *)values); +} + +static uint32_t vkd3d_dxbc_compiler_get_type_id_for_reg(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD write_mask) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + + return vkd3d_spirv_get_type_id(builder, + vkd3d_component_type_from_data_type(reg->data_type), + vkd3d_write_mask_component_count(write_mask)); +} + +static uint32_t vkd3d_dxbc_compiler_get_type_id_for_dst(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_dst_param *dst) +{ + return vkd3d_dxbc_compiler_get_type_id_for_reg(compiler, &dst->reg, dst->write_mask); +} + +static bool vkd3d_dxbc_compiler_get_register_name(char *buffer, unsigned int buffer_size, + const struct vkd3d_shader_register *reg) +{ + unsigned int idx; + + idx = reg->idx[1].offset != ~0u ? reg->idx[1].offset : reg->idx[0].offset; + switch (reg->type) + { + case VKD3DSPR_RESOURCE: + snprintf(buffer, buffer_size, "t%u", reg->idx[0].offset); + break; + case VKD3DSPR_UAV: + snprintf(buffer, buffer_size, "u%u", reg->idx[0].offset); + break; + case VKD3DSPR_SAMPLER: + snprintf(buffer, buffer_size, "s%u", reg->idx[0].offset); + break; + case VKD3DSPR_CONSTBUFFER: + snprintf(buffer, buffer_size, "cb%u_%u", reg->idx[0].offset, reg->idx[1].offset); + break; + case VKD3DSPR_INPUT: + snprintf(buffer, buffer_size, "v%u", idx); + break; + case VKD3DSPR_INCONTROLPOINT: + snprintf(buffer, buffer_size, "vicp%u", idx); + break; + case VKD3DSPR_OUTPUT: + case VKD3DSPR_COLOROUT: + snprintf(buffer, buffer_size, "o%u", idx); + break; + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + snprintf(buffer, buffer_size, "oDepth"); + break; + case VKD3DSPR_FORKINSTID: + snprintf(buffer, buffer_size, "vForkInstanceId"); + break; + case VKD3DSPR_JOININSTID: + snprintf(buffer, buffer_size, "vJoinInstanceId"); + break; + case VKD3DSPR_GSINSTID: + snprintf(buffer, buffer_size, "vGSInstanceID"); + break; + case VKD3DSPR_PATCHCONST: + snprintf(buffer, buffer_size, "vpc%u", idx); + break; + case VKD3DSPR_TESSCOORD: + snprintf(buffer, buffer_size, "vDomainLocation"); + break; + case VKD3DSPR_THREADID: + snprintf(buffer, buffer_size, "vThreadID"); + break; + case VKD3DSPR_LOCALTHREADID: + snprintf(buffer, buffer_size, "vThreadIDInGroup"); + break; + case VKD3DSPR_LOCALTHREADINDEX: + snprintf(buffer, buffer_size, "vThreadIDInGroupFlattened"); + break; + case VKD3DSPR_THREADGROUPID: + snprintf(buffer, buffer_size, "vThreadGroupID"); + break; + case VKD3DSPR_GROUPSHAREDMEM: + snprintf(buffer, buffer_size, "g%u", reg->idx[0].offset); + break; + case VKD3DSPR_IDXTEMP: + snprintf(buffer, buffer_size, "x%u", idx); + break; + case VKD3DSPR_COVERAGE: + snprintf(buffer, buffer_size, "vCoverage"); + break; + case VKD3DSPR_SAMPLEMASK: + snprintf(buffer, buffer_size, "oMask"); + break; + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_PRIMID: + /* SPIRV-Tools disassembler generates names for SPIR-V built-ins. */ + return false; + case VKD3DSPR_OUTSTENCILREF: + snprintf(buffer, buffer_size, "oStencilRef"); + break; + default: + FIXME("Unhandled register %#x.\n", reg->type); + snprintf(buffer, buffer_size, "unrecognized_%#x", reg->type); + return false; + } + + return true; +} + +/* TODO: UAV counters: vkd3d_spirv_build_op_name(builder, counter_var_id, "u%u_counter", reg->idx[0].offset); */ +static void vkd3d_dxbc_compiler_emit_register_debug_name(struct vkd3d_spirv_builder *builder, + uint32_t id, const struct vkd3d_shader_register *reg) +{ + char debug_name[256]; + if (vkd3d_dxbc_compiler_get_register_name(debug_name, ARRAY_SIZE(debug_name), reg)) + vkd3d_spirv_build_op_name(builder, id, "%s", debug_name); +} + +static uint32_t vkd3d_dxbc_compiler_emit_variable(struct vkd3d_dxbc_compiler *compiler, + struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, + enum vkd3d_shader_component_type component_type, unsigned int component_count) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, ptr_type_id; + + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); + return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); +} + +static uint32_t vkd3d_dxbc_compiler_emit_array_variable(struct vkd3d_dxbc_compiler *compiler, + struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, + enum vkd3d_shader_component_type component_type, unsigned int component_count, unsigned int array_length) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, length_id, ptr_type_id; + + if (!array_length) + return vkd3d_dxbc_compiler_emit_variable(compiler, + stream, storage_class, component_type, component_count); + + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + length_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, array_length); + type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); + return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); +} + +static const struct vkd3d_shader_parameter *vkd3d_dxbc_compiler_get_shader_parameter( + struct vkd3d_dxbc_compiler *compiler, enum vkd3d_shader_parameter_name name) +{ + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + unsigned int i; + + for (i = 0; info && i < info->parameter_count; ++i) + { + if (info->parameters[i].name == name) + return &info->parameters[i]; + } + + return NULL; +} + +static const struct vkd3d_spec_constant_info +{ + enum vkd3d_shader_parameter_name name; + uint32_t default_value; + const char *debug_name; +} +vkd3d_shader_parameters[] = +{ + {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, 1, "sample_count"}, +}; + +static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_shader_parameter_name name) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(vkd3d_shader_parameters); ++i) + { + if (vkd3d_shader_parameters[i].name == name) + return &vkd3d_shader_parameters[i]; + } + + FIXME("Unhandled parameter name %#x.\n", name); + return NULL; +} + +static uint32_t vkd3d_dxbc_compiler_alloc_spec_constant_id(struct vkd3d_dxbc_compiler *compiler) +{ + if (!compiler->current_spec_constant_id) + { + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + unsigned int i, id = 0; + + for (i = 0; info && i < info->parameter_count; ++i) + { + const struct vkd3d_shader_parameter *current = &info->parameters[i]; + + if (current->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) + id = max(current->u.specialization_constant.id + 1, id); + } + + compiler->current_spec_constant_id = id; + } + + return compiler->current_spec_constant_id++; +} + +static uint32_t vkd3d_dxbc_compiler_emit_spec_constant(struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_parameter_name name, uint32_t spec_id) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_spec_constant_info *info; + uint32_t type_id, id, default_value; + + info = get_spec_constant_info(name); + default_value = info ? info->default_value : 0; + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + id = vkd3d_spirv_build_op_spec_constant(builder, type_id, default_value); + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationSpecId, spec_id); + + if (info) + vkd3d_spirv_build_op_name(builder, id, "%s", info->debug_name); + + if (vkd3d_array_reserve((void **)&compiler->spec_constants, &compiler->spec_constants_size, + compiler->spec_constant_count + 1, sizeof(*compiler->spec_constants))) + { + struct vkd3d_shader_spec_constant *constant = &compiler->spec_constants[compiler->spec_constant_count++]; + constant->name = name; + constant->id = id; + } + + return id; +} + +static uint32_t vkd3d_dxbc_compiler_get_spec_constant(struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_parameter_name name, uint32_t spec_id) +{ + unsigned int i; + + for (i = 0; i < compiler->spec_constant_count; ++i) + { + if (compiler->spec_constants[i].name == name) + return compiler->spec_constants[i].id; + } + + return vkd3d_dxbc_compiler_emit_spec_constant(compiler, name, spec_id); +} + +static uint32_t vkd3d_dxbc_compiler_emit_uint_shader_parameter(struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_parameter_name name) +{ + const struct vkd3d_shader_parameter *parameter; + + if (!(parameter = vkd3d_dxbc_compiler_get_shader_parameter(compiler, name))) + { + WARN("Unresolved shader parameter %#x.\n", name); + goto default_parameter; + } + + if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) + return vkd3d_dxbc_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); + if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) + return vkd3d_dxbc_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id); + + FIXME("Unhandled parameter type %#x.\n", parameter->type); + +default_parameter: + return vkd3d_dxbc_compiler_get_spec_constant(compiler, + name, vkd3d_dxbc_compiler_alloc_spec_constant_id(compiler)); +} + +static uint32_t vkd3d_dxbc_compiler_emit_construct_vector(struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_component_type component_type, unsigned int component_count, + uint32_t val_id, unsigned int val_component_idx, unsigned int val_component_count) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t components[VKD3D_VEC4_SIZE]; + uint32_t type_id, result_id; + unsigned int i; + + assert(val_component_idx < val_component_count); + + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + if (val_component_count == 1) + { + for (i = 0; i < component_count; ++i) + components[i] = val_id; + result_id = vkd3d_spirv_build_op_composite_construct(builder, + type_id, components, component_count); + } + else + { + for (i = 0; i < component_count; ++i) + components[i] = val_component_idx; + result_id = vkd3d_spirv_build_op_vector_shuffle(builder, + type_id, val_id, val_id, components, component_count); + } + return result_id; +} + +static uint32_t vkd3d_dxbc_compiler_emit_load_src(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_src_param *src, DWORD write_mask); + +static uint32_t vkd3d_dxbc_compiler_emit_register_addressing(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register_index *reg_index) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, addr_id; + + if (!reg_index->rel_addr) + return vkd3d_dxbc_compiler_get_constant_uint(compiler, reg_index->offset); + + addr_id = vkd3d_dxbc_compiler_emit_load_src(compiler, reg_index->rel_addr, VKD3DSP_WRITEMASK_0); + if (reg_index->offset) + { + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + addr_id = vkd3d_spirv_build_op_iadd(builder, type_id, + addr_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, reg_index->offset)); + } + return addr_id; +} + +struct vkd3d_shader_register_info +{ + uint32_t id; + const struct vkd3d_symbol *descriptor_array; + SpvStorageClass storage_class; + enum vkd3d_shader_component_type component_type; + unsigned int write_mask; + uint32_t member_idx; + unsigned int structure_stride; + unsigned int binding_base_idx; + bool is_aggregate; + bool is_dynamically_indexed; +}; + +static bool vkd3d_dxbc_compiler_get_register_info(const struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_info *register_info) +{ + struct vkd3d_symbol reg_symbol, *symbol; + struct rb_entry *entry; + + assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); + + if (reg->type == VKD3DSPR_TEMP) + { + assert(reg->idx[0].offset < compiler->temp_count); + register_info->id = compiler->temp_id + reg->idx[0].offset; + register_info->storage_class = SpvStorageClassFunction; + register_info->descriptor_array = NULL; + register_info->member_idx = 0; + register_info->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + register_info->write_mask = VKD3DSP_WRITEMASK_ALL; + register_info->structure_stride = 0; + register_info->binding_base_idx = 0; + register_info->is_aggregate = false; + register_info->is_dynamically_indexed = false; + return true; + } + + vkd3d_symbol_make_register(®_symbol, reg); + if (!(entry = rb_get(&compiler->symbol_table, ®_symbol))) + { + FIXME("Unrecognized register (%s).\n", debug_vkd3d_symbol(®_symbol)); + memset(register_info, 0, sizeof(*register_info)); + return false; + } + + symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); + register_info->id = symbol->id; + register_info->descriptor_array = symbol->descriptor_array; + register_info->storage_class = symbol->info.reg.storage_class; + register_info->member_idx = symbol->info.reg.member_idx; + register_info->component_type = symbol->info.reg.component_type; + register_info->write_mask = symbol->info.reg.write_mask; + register_info->structure_stride = symbol->info.reg.structure_stride; + register_info->binding_base_idx = symbol->info.reg.binding_base_idx; + register_info->is_aggregate = symbol->info.reg.is_aggregate; + register_info->is_dynamically_indexed = symbol->info.reg.is_dynamically_indexed; + + return true; +} + +static bool register_is_descriptor(const struct vkd3d_shader_register *reg) +{ + switch (reg->type) + { + case VKD3DSPR_SAMPLER: + case VKD3DSPR_RESOURCE: + case VKD3DSPR_CONSTBUFFER: + case VKD3DSPR_UAV: + return true; + + default: + return false; + } +} + +static bool vkd3d_dxbc_compiler_enable_descriptor_indexing(struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_register_type reg_type, enum vkd3d_shader_resource_type resource_type) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + + if (!vkd3d_dxbc_compiler_is_target_extension_supported(compiler, + VKD3D_SHADER_SPIRV_EXTENSION_EXT_DESCRIPTOR_INDEXING)) + return false; + + switch (reg_type) + { + case VKD3DSPR_CONSTBUFFER: + vkd3d_spirv_enable_capability(builder, SpvCapabilityUniformBufferArrayDynamicIndexing); + break; + case VKD3DSPR_RESOURCE: + vkd3d_spirv_enable_capability(builder, resource_type == VKD3D_SHADER_RESOURCE_BUFFER + ? SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT + : SpvCapabilitySampledImageArrayDynamicIndexing); + break; + case VKD3DSPR_UAV: + if (resource_type == VKD3D_SHADER_RESOURCE_BUFFER) + vkd3d_spirv_enable_capability(builder, compiler->ssbo_uavs + ? SpvCapabilityStorageBufferArrayDynamicIndexing + : SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT); + else + vkd3d_spirv_enable_capability(builder, SpvCapabilityStorageImageArrayDynamicIndexing); + break; + case VKD3DSPR_SAMPLER: + break; + default: + ERR("Unhandled register type %#x.\n", reg_type); + break; + } + + return true; +} + +static uint32_t vkd3d_dxbc_compiler_get_descriptor_index(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, const struct vkd3d_symbol *array_symbol, + unsigned int binding_base_idx, enum vkd3d_shader_resource_type resource_type) +{ + const struct vkd3d_symbol_descriptor_array *array_key = &array_symbol->key.descriptor_array; + struct vkd3d_shader_register_index index = reg->idx[1]; + unsigned int push_constant_index; + uint32_t index_id; + + if ((push_constant_index = array_key->push_constant_index) != ~0u || index.rel_addr) + { + if (!vkd3d_dxbc_compiler_enable_descriptor_indexing(compiler, reg->type, resource_type)) + { + FIXME("The target environment does not support descriptor indexing.\n"); + vkd3d_dxbc_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_IDX_UNSUPPORTED, + "Cannot dynamically index a descriptor array of type %#x, id %u. " + "The target environment does not support descriptor indexing.", reg->type, reg->idx[0].offset); + } + } + + index.offset -= binding_base_idx; + index_id = vkd3d_dxbc_compiler_emit_register_addressing(compiler, &index); + + if (push_constant_index != ~0u) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, ptr_type_id, ptr_id, offset_id, index_ids[2]; + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + if (!(offset_id = compiler->descriptor_offset_ids[push_constant_index])) + { + index_ids[0] = compiler->descriptor_offsets_member_id; + index_ids[1] = vkd3d_dxbc_compiler_get_constant_uint(compiler, push_constant_index); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPushConstant, type_id); + ptr_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, ptr_type_id, + compiler->push_constants_var_id, index_ids, 2); + offset_id = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); + if (!compiler->control_flow_depth) + compiler->descriptor_offset_ids[push_constant_index] = offset_id; + } + index_id = vkd3d_spirv_build_op_iadd(builder, type_id, index_id, offset_id); + } + + return index_id; +} + +static void vkd3d_dxbc_compiler_emit_dereference_register(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_info *register_info) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int component_count, index_count = 0; + uint32_t type_id, ptr_type_id; + uint32_t indexes[3]; + + if (reg->type == VKD3DSPR_CONSTBUFFER) + { + assert(!reg->idx[0].rel_addr); + if (register_info->descriptor_array) + indexes[index_count++] = vkd3d_dxbc_compiler_get_descriptor_index(compiler, reg, + register_info->descriptor_array, register_info->binding_base_idx, VKD3D_SHADER_RESOURCE_BUFFER); + indexes[index_count++] = vkd3d_dxbc_compiler_get_constant_uint(compiler, register_info->member_idx); + indexes[index_count++] = vkd3d_dxbc_compiler_emit_register_addressing(compiler, ®->idx[2]); + } + else if (reg->type == VKD3DSPR_IMMCONSTBUFFER) + { + indexes[index_count++] = vkd3d_dxbc_compiler_emit_register_addressing(compiler, ®->idx[0]); + } + else if (reg->type == VKD3DSPR_IDXTEMP) + { + indexes[index_count++] = vkd3d_dxbc_compiler_emit_register_addressing(compiler, ®->idx[1]); + } + else if (register_info->is_aggregate) + { + if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_INCONTROLPOINT) + { + /* Indices for these are swapped compared to the generated SPIR-V. */ + if (reg->idx[1].offset != ~0u) + indexes[index_count++] = vkd3d_dxbc_compiler_emit_register_addressing(compiler, ®->idx[1]); + if (reg->idx[0].offset != ~0u) + indexes[index_count++] = vkd3d_dxbc_compiler_emit_register_addressing(compiler, ®->idx[0]); + } + else + { + struct vkd3d_shader_register_index reg_idx = reg->idx[0]; + + if (reg->idx[1].rel_addr) + FIXME("Relative addressing not implemented.\n"); + + if (register_info->is_dynamically_indexed) + { + indexes[index_count++] = vkd3d_spirv_build_op_load(builder, + vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, 1), + register_info->member_idx, SpvMemoryAccessMaskNone); + } + else + { + reg_idx.offset = register_info->member_idx; + indexes[index_count++] = vkd3d_dxbc_compiler_emit_register_addressing(compiler, ®_idx); + } + } + } + else + { + if (reg->idx[1].rel_addr || (reg->idx[1].offset == ~0u && reg->idx[0].rel_addr)) + FIXME("Relative addressing not implemented.\n"); + + /* Handle arrayed registers, e.g. v[3][0]. */ + if (reg->idx[1].offset != ~0u && !register_is_descriptor(reg)) + indexes[index_count++] = vkd3d_dxbc_compiler_emit_register_addressing(compiler, ®->idx[0]); + } + + if (index_count) + { + component_count = vkd3d_write_mask_component_count(register_info->write_mask); + type_id = vkd3d_spirv_get_type_id(builder, register_info->component_type, component_count); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, register_info->storage_class, type_id); + register_info->id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, + register_info->id, indexes, index_count); + if (reg->non_uniform) + vkd3d_dxbc_compiler_decorate_nonuniform(compiler, register_info->id); + } +} + +static uint32_t vkd3d_dxbc_compiler_get_register_id(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + struct vkd3d_shader_register_info register_info; + + if (vkd3d_dxbc_compiler_get_register_info(compiler, reg, ®ister_info)) + { + vkd3d_dxbc_compiler_emit_dereference_register(compiler, reg, ®ister_info); + return register_info.id; + } + + return vkd3d_dxbc_compiler_emit_variable(compiler, &builder->global_stream, + SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); +} + +static bool vkd3d_swizzle_is_equal(unsigned int dst_write_mask, + unsigned int swizzle, unsigned int write_mask) +{ + return vkd3d_compact_swizzle(VKD3D_SHADER_NO_SWIZZLE, dst_write_mask) == vkd3d_compact_swizzle(swizzle, write_mask); +} + +static uint32_t vkd3d_dxbc_compiler_emit_swizzle(struct vkd3d_dxbc_compiler *compiler, + uint32_t val_id, unsigned int val_write_mask, enum vkd3d_shader_component_type component_type, + unsigned int swizzle, unsigned int write_mask) +{ + unsigned int i, component_idx, component_count, val_component_count; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, components[VKD3D_VEC4_SIZE]; + + component_count = vkd3d_write_mask_component_count(write_mask); + val_component_count = vkd3d_write_mask_component_count(val_write_mask); + + if (component_count == val_component_count + && (component_count == 1 || vkd3d_swizzle_is_equal(val_write_mask, swizzle, write_mask))) + return val_id; + + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + + if (component_count == 1) + { + component_idx = vkd3d_write_mask_get_component_idx(write_mask); + component_idx = vkd3d_swizzle_get_component(swizzle, component_idx); + component_idx -= vkd3d_write_mask_get_component_idx(val_write_mask); + return vkd3d_spirv_build_op_composite_extract1(builder, type_id, val_id, component_idx); + } + + if (val_component_count == 1) + { + for (i = 0, component_idx = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) + { + assert(VKD3DSP_WRITEMASK_0 << vkd3d_swizzle_get_component(swizzle, i) == val_write_mask); + components[component_idx++] = val_id; + } + } + return vkd3d_spirv_build_op_composite_construct(builder, type_id, components, component_count); + } + + for (i = 0, component_idx = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) + components[component_idx++] = vkd3d_swizzle_get_component(swizzle, i); + } + return vkd3d_spirv_build_op_vector_shuffle(builder, + type_id, val_id, val_id, components, component_count); +} + +static uint32_t vkd3d_dxbc_compiler_emit_vector_shuffle(struct vkd3d_dxbc_compiler *compiler, + uint32_t vector1_id, uint32_t vector2_id, unsigned int swizzle, unsigned int write_mask, + enum vkd3d_shader_component_type component_type, unsigned int component_count) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t components[VKD3D_VEC4_SIZE]; + uint32_t type_id; + unsigned int i; + + assert(component_count <= ARRAY_SIZE(components)); + + for (i = 0; i < component_count; ++i) + { + if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) + components[i] = vkd3d_swizzle_get_component(swizzle, i); + else + components[i] = VKD3D_VEC4_SIZE + vkd3d_swizzle_get_component(swizzle, i); + } + + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + return vkd3d_spirv_build_op_vector_shuffle(builder, + type_id, vector1_id, vector2_id, components, component_count); +} + +static uint32_t vkd3d_dxbc_compiler_emit_load_constant(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask) +{ + unsigned int component_count = vkd3d_write_mask_component_count(write_mask); + uint32_t values[VKD3D_VEC4_SIZE] = {0}; + unsigned int i, j; + + assert(reg->type == VKD3DSPR_IMMCONST); + + if (reg->immconst_type == VKD3D_IMMCONST_SCALAR) + { + for (i = 0; i < component_count; ++i) + values[i] = *reg->u.immconst_uint; + } + else + { + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) + values[j++] = reg->u.immconst_uint[vkd3d_swizzle_get_component(swizzle, i)]; + } + } + + return vkd3d_dxbc_compiler_get_constant(compiler, + vkd3d_component_type_from_data_type(reg->data_type), component_count, values); +} + +static uint32_t vkd3d_dxbc_compiler_emit_load_constant64(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask) +{ + unsigned int component_count = vkd3d_write_mask_component_count(write_mask); + uint64_t values[VKD3D_DVEC2_SIZE] = {0}; + unsigned int i, j; + + assert(reg->type == VKD3DSPR_IMMCONST64); + + if (reg->immconst_type == VKD3D_IMMCONST_SCALAR) + { + for (i = 0; i < component_count; ++i) + values[i] = *reg->u.immconst_uint64; + } + else + { + for (i = 0, j = 0; i < VKD3D_DVEC2_SIZE; ++i) + { + if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) + values[j++] = reg->u.immconst_uint64[vkd3d_swizzle_get_component64(swizzle, i)]; + } + } + + return vkd3d_dxbc_compiler_get_constant64(compiler, + vkd3d_component_type_from_data_type(reg->data_type), component_count, values); +} + +static uint32_t vkd3d_dxbc_compiler_emit_load_scalar(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask, + const struct vkd3d_shader_register_info *reg_info) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, ptr_type_id, index, reg_id, val_id; + unsigned int component_idx, reg_component_count; + enum vkd3d_shader_component_type component_type; + unsigned int skipped_component_mask; + + assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); + assert(vkd3d_write_mask_component_count(write_mask) == 1); + + component_idx = vkd3d_write_mask_get_component_idx(write_mask); + component_idx = vkd3d_swizzle_get_component(swizzle, component_idx); + skipped_component_mask = ~reg_info->write_mask & ((VKD3DSP_WRITEMASK_0 << component_idx) - 1); + if (skipped_component_mask) + component_idx -= vkd3d_write_mask_component_count(skipped_component_mask); + component_type = vkd3d_component_type_from_data_type(reg->data_type); + + reg_component_count = vkd3d_write_mask_component_count(reg_info->write_mask); + + if (component_idx >= vkd3d_write_mask_component_count(reg_info->write_mask)) + { + ERR("Invalid component_idx %u for register %#x, %u (write_mask %#x).\n", + component_idx, reg->type, reg->idx[0].offset, reg_info->write_mask); + } + + type_id = vkd3d_spirv_get_type_id(builder, reg_info->component_type, 1); + reg_id = reg_info->id; + if (reg_component_count != 1) + { + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info->storage_class, type_id); + index = vkd3d_dxbc_compiler_get_constant_uint(compiler, component_idx); + reg_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, ptr_type_id, reg_id, index); + } + + val_id = vkd3d_spirv_build_op_load(builder, type_id, reg_id, SpvMemoryAccessMaskNone); + + if (component_type != reg_info->component_type) + { + type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + } + + return val_id; +} + +static uint32_t vkd3d_dxbc_compiler_emit_load_reg(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + enum vkd3d_shader_component_type component_type; + struct vkd3d_shader_register_info reg_info; + unsigned int component_count; + unsigned int write_mask32; + uint32_t type_id, val_id; + + if (reg->type == VKD3DSPR_IMMCONST) + return vkd3d_dxbc_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); + else if (reg->type == VKD3DSPR_IMMCONST64) + return vkd3d_dxbc_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); + + component_count = vkd3d_write_mask_component_count(write_mask); + component_type = vkd3d_component_type_from_data_type(reg->data_type); + if (!vkd3d_dxbc_compiler_get_register_info(compiler, reg, ®_info)) + { + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); + } + assert(reg_info.component_type != VKD3D_SHADER_COMPONENT_DOUBLE); + vkd3d_dxbc_compiler_emit_dereference_register(compiler, reg, ®_info); + + write_mask32 = (reg->data_type == VKD3D_DATA_DOUBLE) ? vkd3d_write_mask_32_from_64(write_mask) : write_mask; + + /* Intermediate value (no storage class). */ + if (reg_info.storage_class == SpvStorageClassMax) + { + val_id = reg_info.id; + } + else if (vkd3d_write_mask_component_count(write_mask32) == 1) + { + return vkd3d_dxbc_compiler_emit_load_scalar(compiler, reg, swizzle, write_mask, ®_info); + } + else + { + type_id = vkd3d_spirv_get_type_id(builder, + reg_info.component_type, vkd3d_write_mask_component_count(reg_info.write_mask)); + val_id = vkd3d_spirv_build_op_load(builder, type_id, reg_info.id, SpvMemoryAccessMaskNone); + } + + val_id = vkd3d_dxbc_compiler_emit_swizzle(compiler, + val_id, reg_info.write_mask, reg_info.component_type, swizzle, write_mask32); + + if (component_type != reg_info.component_type) + { + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + } + + return val_id; +} + +static void vkd3d_dxbc_compiler_emit_execution_mode(struct vkd3d_dxbc_compiler *compiler, + SpvExecutionMode mode, const uint32_t *literals, unsigned int literal_count) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + + vkd3d_spirv_build_op_execution_mode(&builder->execution_mode_stream, + builder->main_function_id, mode, literals, literal_count); +} + +static void vkd3d_dxbc_compiler_emit_execution_mode1(struct vkd3d_dxbc_compiler *compiler, + SpvExecutionMode mode, const uint32_t literal) +{ + vkd3d_dxbc_compiler_emit_execution_mode(compiler, mode, &literal, 1); +} + +static uint32_t vkd3d_dxbc_compiler_emit_abs(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD write_mask, uint32_t val_id) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id; + + type_id = vkd3d_dxbc_compiler_get_type_id_for_reg(compiler, reg, write_mask); + if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + return vkd3d_spirv_build_op_glsl_std450_fabs(builder, type_id, val_id); + + FIXME("Unhandled data type %#x.\n", reg->data_type); + return val_id; +} + +static uint32_t vkd3d_dxbc_compiler_emit_neg(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD write_mask, uint32_t val_id) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id; + + type_id = vkd3d_dxbc_compiler_get_type_id_for_reg(compiler, reg, write_mask); + if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + return vkd3d_spirv_build_op_fnegate(builder, type_id, val_id); + else if (reg->data_type == VKD3D_DATA_INT) + return vkd3d_spirv_build_op_snegate(builder, type_id, val_id); + + FIXME("Unhandled data type %#x.\n", reg->data_type); + return val_id; +} + +static uint32_t vkd3d_dxbc_compiler_emit_src_modifier(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD write_mask, + enum vkd3d_shader_src_modifier modifier, uint32_t val_id) +{ + switch (modifier) + { + case VKD3DSPSM_NONE: + break; + case VKD3DSPSM_NEG: + return vkd3d_dxbc_compiler_emit_neg(compiler, reg, write_mask, val_id); + case VKD3DSPSM_ABS: + return vkd3d_dxbc_compiler_emit_abs(compiler, reg, write_mask, val_id); + case VKD3DSPSM_ABSNEG: + val_id = vkd3d_dxbc_compiler_emit_abs(compiler, reg, write_mask, val_id); + return vkd3d_dxbc_compiler_emit_neg(compiler, reg, write_mask, val_id); + default: + FIXME("Unhandled src modifier %#x.\n", modifier); + break; + } + + return val_id; +} + +static uint32_t vkd3d_dxbc_compiler_emit_load_src(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_src_param *src, DWORD write_mask) +{ + uint32_t val_id; + + val_id = vkd3d_dxbc_compiler_emit_load_reg(compiler, &src->reg, src->swizzle, write_mask); + return vkd3d_dxbc_compiler_emit_src_modifier(compiler, &src->reg, write_mask, src->modifiers, val_id); +} + +static uint32_t vkd3d_dxbc_compiler_emit_load_src_with_type(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_src_param *src, DWORD write_mask, enum vkd3d_shader_component_type component_type) +{ + struct vkd3d_shader_src_param src_param = *src; + + src_param.reg.data_type = vkd3d_data_type_from_component_type(component_type); + return vkd3d_dxbc_compiler_emit_load_src(compiler, &src_param, write_mask); +} + +static void vkd3d_dxbc_compiler_emit_store_scalar(struct vkd3d_dxbc_compiler *compiler, + uint32_t dst_id, unsigned int dst_write_mask, enum vkd3d_shader_component_type component_type, + SpvStorageClass storage_class, unsigned int write_mask, uint32_t val_id) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, ptr_type_id, index; + unsigned int component_idx; + + if (vkd3d_write_mask_component_count(dst_write_mask) > 1) + { + type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); + component_idx = vkd3d_write_mask_get_component_idx(write_mask); + component_idx -= vkd3d_write_mask_get_component_idx(dst_write_mask); + index = vkd3d_dxbc_compiler_get_constant_uint(compiler, component_idx); + dst_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, ptr_type_id, dst_id, index); + } + + vkd3d_spirv_build_op_store(builder, dst_id, val_id, SpvMemoryAccessMaskNone); +} + +static void vkd3d_dxbc_compiler_emit_store(struct vkd3d_dxbc_compiler *compiler, + uint32_t dst_id, unsigned int dst_write_mask, enum vkd3d_shader_component_type component_type, + SpvStorageClass storage_class, unsigned int write_mask, uint32_t val_id) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int component_count, dst_component_count; + uint32_t components[VKD3D_VEC4_SIZE]; + unsigned int i, src_idx, dst_idx; + uint32_t type_id, dst_val_id; + + assert(write_mask); + + component_count = vkd3d_write_mask_component_count(write_mask); + dst_component_count = vkd3d_write_mask_component_count(dst_write_mask); + + if (dst_component_count == 1 && component_count != 1) + { + type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + val_id = vkd3d_spirv_build_op_composite_extract1(builder, type_id, val_id, + vkd3d_write_mask_get_component_idx(dst_write_mask)); + write_mask &= dst_write_mask; + component_count = 1; + } + + if (component_count == 1) + { + return vkd3d_dxbc_compiler_emit_store_scalar(compiler, + dst_id, dst_write_mask, component_type, storage_class, write_mask, val_id); + } + + if (dst_component_count != component_count) + { + type_id = vkd3d_spirv_get_type_id(builder, component_type, dst_component_count); + dst_val_id = vkd3d_spirv_build_op_load(builder, type_id, dst_id, SpvMemoryAccessMaskNone); + + assert(component_count <= ARRAY_SIZE(components)); + + for (i = 0, src_idx = 0, dst_idx = 0; dst_idx < VKD3D_VEC4_SIZE; ++dst_idx) + { + if (write_mask & (VKD3DSP_WRITEMASK_0 << dst_idx)) + components[i] = dst_component_count + src_idx++; + else + components[i] = i; + + if (dst_write_mask & (VKD3DSP_WRITEMASK_0 << dst_idx)) + ++i; + } + + val_id = vkd3d_spirv_build_op_vector_shuffle(builder, + type_id, dst_val_id, val_id, components, dst_component_count); + } + + vkd3d_spirv_build_op_store(builder, dst_id, val_id, SpvMemoryAccessMaskNone); +} + +static void vkd3d_dxbc_compiler_emit_store_reg(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, unsigned int write_mask, uint32_t val_id) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + enum vkd3d_shader_component_type component_type; + struct vkd3d_shader_register_info reg_info; + unsigned int src_write_mask = write_mask; + uint32_t type_id; + + assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); + + if (!vkd3d_dxbc_compiler_get_register_info(compiler, reg, ®_info)) + return; + vkd3d_dxbc_compiler_emit_dereference_register(compiler, reg, ®_info); + + component_type = vkd3d_component_type_from_data_type(reg->data_type); + if (component_type != reg_info.component_type) + { + if (reg->data_type == VKD3D_DATA_DOUBLE) + src_write_mask = vkd3d_write_mask_32_from_64(write_mask); + type_id = vkd3d_spirv_get_type_id(builder, reg_info.component_type, + vkd3d_write_mask_component_count(src_write_mask)); + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + component_type = reg_info.component_type; + } + + vkd3d_dxbc_compiler_emit_store(compiler, + reg_info.id, reg_info.write_mask, component_type, reg_info.storage_class, src_write_mask, val_id); +} + +static uint32_t vkd3d_dxbc_compiler_emit_sat(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD write_mask, uint32_t val_id) +{ + unsigned int component_count = vkd3d_write_mask_component_count(write_mask); + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, zero_id, one_id; + + if (reg->data_type == VKD3D_DATA_DOUBLE) + { + zero_id = vkd3d_dxbc_compiler_get_constant_double_vector(compiler, 0.0, component_count); + one_id = vkd3d_dxbc_compiler_get_constant_double_vector(compiler, 1.0, component_count); + } + else + { + zero_id = vkd3d_dxbc_compiler_get_constant_float_vector(compiler, 0.0f, component_count); + one_id = vkd3d_dxbc_compiler_get_constant_float_vector(compiler, 1.0f, component_count); + } + + type_id = vkd3d_dxbc_compiler_get_type_id_for_reg(compiler, reg, write_mask); + if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + return vkd3d_spirv_build_op_glsl_std450_nclamp(builder, type_id, val_id, zero_id, one_id); + + FIXME("Unhandled data type %#x.\n", reg->data_type); + return val_id; +} + +static void vkd3d_dxbc_compiler_emit_store_dst(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_dst_param *dst, uint32_t val_id) +{ + assert(!(dst->modifiers & ~VKD3DSPDM_SATURATE)); + if (dst->modifiers & VKD3DSPDM_SATURATE) + val_id = vkd3d_dxbc_compiler_emit_sat(compiler, &dst->reg, dst->write_mask, val_id); + + vkd3d_dxbc_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, val_id); +} + +static void vkd3d_dxbc_compiler_emit_store_dst_swizzled(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_dst_param *dst, uint32_t val_id, + enum vkd3d_shader_component_type component_type, DWORD swizzle) +{ + struct vkd3d_shader_dst_param typed_dst = *dst; + val_id = vkd3d_dxbc_compiler_emit_swizzle(compiler, + val_id, VKD3DSP_WRITEMASK_ALL, component_type, swizzle, dst->write_mask); + /* XXX: The register data type could be fixed by the shader parser. For SM5 + * shaders the data types are stored in instructions modifiers. + */ + typed_dst.reg.data_type = vkd3d_data_type_from_component_type(component_type); + vkd3d_dxbc_compiler_emit_store_dst(compiler, &typed_dst, val_id); +} + +static void vkd3d_dxbc_compiler_emit_store_dst_components(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_component_type component_type, + uint32_t *component_ids) +{ + unsigned int component_count = vkd3d_write_mask_component_count(dst->write_mask); + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, val_id; + + if (component_count > 1) + { + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + val_id = vkd3d_spirv_build_op_composite_construct(builder, + type_id, component_ids, component_count); + } + else + { + val_id = *component_ids; + } + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void vkd3d_dxbc_compiler_emit_store_dst_scalar(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_dst_param *dst, uint32_t val_id, + enum vkd3d_shader_component_type component_type, DWORD swizzle) +{ + unsigned int component_count = vkd3d_write_mask_component_count(dst->write_mask); + uint32_t component_ids[VKD3D_VEC4_SIZE]; + unsigned int component_idx, i; + + component_idx = vkd3d_write_mask_get_component_idx(dst->write_mask); + for (i = 0; i < component_count; ++i) + { + if (vkd3d_swizzle_get_component(swizzle, component_idx + i)) + ERR("Invalid swizzle %#x for scalar value, write mask %#x.\n", swizzle, dst->write_mask); + + component_ids[i] = val_id; + } + vkd3d_dxbc_compiler_emit_store_dst_components(compiler, dst, component_type, component_ids); +} + +static void vkd3d_dxbc_compiler_decorate_builtin(struct vkd3d_dxbc_compiler *compiler, + uint32_t target_id, SpvBuiltIn builtin) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + + switch (builtin) + { + case SpvBuiltInPrimitiveId: + if (compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL) + vkd3d_spirv_enable_capability(builder, SpvCapabilityGeometry); + break; + case SpvBuiltInFragDepth: + vkd3d_dxbc_compiler_emit_execution_mode(compiler, SpvExecutionModeDepthReplacing, NULL, 0); + break; + case SpvBuiltInLayer: + vkd3d_spirv_enable_capability(builder, SpvCapabilityGeometry); + break; + case SpvBuiltInViewportIndex: + vkd3d_spirv_enable_capability(builder, SpvCapabilityMultiViewport); + break; + case SpvBuiltInSampleId: + vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); + break; + case SpvBuiltInClipDistance: + vkd3d_spirv_enable_capability(builder, SpvCapabilityClipDistance); + break; + case SpvBuiltInCullDistance: + vkd3d_spirv_enable_capability(builder, SpvCapabilityCullDistance); + break; + default: + break; + } + + vkd3d_spirv_build_op_decorate1(builder, target_id, SpvDecorationBuiltIn, builtin); +} + +static void vkd3d_dxbc_compiler_emit_interpolation_decorations(struct vkd3d_dxbc_compiler *compiler, + uint32_t id, enum vkd3d_shader_interpolation_mode mode) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + + switch (mode) + { + case VKD3DSIM_NONE: + break; + case VKD3DSIM_CONSTANT: + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationFlat, NULL, 0); + break; + case VKD3DSIM_LINEAR: + break; + case VKD3DSIM_LINEAR_CENTROID: + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationCentroid, NULL, 0); + break; + case VKD3DSIM_LINEAR_NOPERSPECTIVE: + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationNoPerspective, NULL, 0); + break; + case VKD3DSIM_LINEAR_SAMPLE: + vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); + break; + default: + FIXME("Unhandled interpolation mode %#x.\n", mode); + break; + } +} + +static uint32_t vkd3d_dxbc_compiler_emit_int_to_bool(struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_conditional_op condition, unsigned int component_count, uint32_t val_id) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id; + SpvOp op; + + assert(!(condition & ~(VKD3D_SHADER_CONDITIONAL_OP_NZ | VKD3D_SHADER_CONDITIONAL_OP_Z))); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + op = condition & VKD3D_SHADER_CONDITIONAL_OP_Z ? SpvOpIEqual : SpvOpINotEqual; + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, val_id, + vkd3d_dxbc_compiler_get_constant_uint_vector(compiler, 0, component_count)); +} + +static uint32_t vkd3d_dxbc_compiler_emit_bool_to_int(struct vkd3d_dxbc_compiler *compiler, + unsigned int component_count, uint32_t val_id) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, true_id, false_id; + + true_id = vkd3d_dxbc_compiler_get_constant_uint_vector(compiler, 0xffffffff, component_count); + false_id = vkd3d_dxbc_compiler_get_constant_uint_vector(compiler, 0, component_count); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, component_count); + return vkd3d_spirv_build_op_select(builder, type_id, val_id, true_id, false_id); +} + +typedef uint32_t (*vkd3d_spirv_builtin_fixup_pfn)(struct vkd3d_dxbc_compiler *compiler, + uint32_t val_id); + +static uint32_t vkd3d_dxbc_compiler_emit_draw_parameter_fixup(struct vkd3d_dxbc_compiler *compiler, + uint32_t index_id, SpvBuiltIn base) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t base_var_id, base_id, type_id; + + vkd3d_spirv_enable_capability(builder, SpvCapabilityDrawParameters); + + base_var_id = vkd3d_dxbc_compiler_emit_variable(compiler, &builder->global_stream, + SpvStorageClassInput, VKD3D_SHADER_COMPONENT_INT, 1); + vkd3d_spirv_add_iface_variable(builder, base_var_id); + vkd3d_dxbc_compiler_decorate_builtin(compiler, base_var_id, base); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, 1); + base_id = vkd3d_spirv_build_op_load(builder, + type_id, base_var_id, SpvMemoryAccessMaskNone); + + return vkd3d_spirv_build_op_isub(builder, type_id, index_id, base_id); +} + +/* Substitute "VertexIndex - BaseVertex" for SV_VertexID. */ +static uint32_t sv_vertex_id_fixup(struct vkd3d_dxbc_compiler *compiler, + uint32_t vertex_index_id) +{ + return vkd3d_dxbc_compiler_emit_draw_parameter_fixup(compiler, + vertex_index_id, SpvBuiltInBaseVertex); +} + +/* Substitute "InstanceIndex - BaseInstance" for SV_InstanceID. */ +static uint32_t sv_instance_id_fixup(struct vkd3d_dxbc_compiler *compiler, + uint32_t instance_index_id) +{ + return vkd3d_dxbc_compiler_emit_draw_parameter_fixup(compiler, + instance_index_id, SpvBuiltInBaseInstance); +} + +static uint32_t sv_front_face_fixup(struct vkd3d_dxbc_compiler *compiler, + uint32_t front_facing_id) +{ + return vkd3d_dxbc_compiler_emit_bool_to_int(compiler, 1, front_facing_id); +} + +/* frag_coord.w = 1.0f / frag_coord.w */ +static uint32_t frag_coord_fixup(struct vkd3d_dxbc_compiler *compiler, + uint32_t frag_coord_id) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, w_id; + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 1); + w_id = vkd3d_spirv_build_op_composite_extract1(builder, type_id, frag_coord_id, 3); + w_id = vkd3d_spirv_build_op_fdiv(builder, type_id, + vkd3d_dxbc_compiler_get_constant_float(compiler, 1.0f), w_id); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + return vkd3d_spirv_build_op_composite_insert1(builder, type_id, w_id, frag_coord_id, 3); +} + +struct vkd3d_spirv_builtin +{ + enum vkd3d_shader_component_type component_type; + unsigned int component_count; + SpvBuiltIn spirv_builtin; + vkd3d_spirv_builtin_fixup_pfn fixup_pfn; + unsigned int spirv_array_size; + unsigned int member_idx; +}; + +/* + * The following tables are based on the "14.6. Built-In Variables" section + * from the Vulkan spec. + */ +static const struct +{ + enum vkd3d_shader_input_sysval_semantic sysval; + struct vkd3d_spirv_builtin builtin; + enum vkd3d_shader_spirv_environment environment; +} +vkd3d_system_value_builtins[] = +{ + {VKD3D_SIV_VERTEX_ID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInVertexId}, + VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5}, + {VKD3D_SIV_INSTANCE_ID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInstanceId}, + VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5}, + + {VKD3D_SIV_POSITION, {VKD3D_SHADER_COMPONENT_FLOAT, 4, SpvBuiltInPosition}}, + {VKD3D_SIV_VERTEX_ID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInVertexIndex, sv_vertex_id_fixup}}, + {VKD3D_SIV_INSTANCE_ID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInstanceIndex, sv_instance_id_fixup}}, + + {VKD3D_SIV_PRIMITIVE_ID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}}, + + {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLayer}}, + {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInViewportIndex}}, + + {VKD3D_SIV_IS_FRONT_FACE, {VKD3D_SHADER_COMPONENT_BOOL, 1, SpvBuiltInFrontFacing, sv_front_face_fixup}}, + + {VKD3D_SIV_SAMPLE_INDEX, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleId}}, + + {VKD3D_SIV_CLIP_DISTANCE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInClipDistance, NULL, 1}}, + {VKD3D_SIV_CULL_DISTANCE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInCullDistance, NULL, 1}}, + + {VKD3D_SIV_QUAD_U0_TESS_FACTOR, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4, 0}}, + {VKD3D_SIV_QUAD_V0_TESS_FACTOR, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4, 1}}, + {VKD3D_SIV_QUAD_U1_TESS_FACTOR, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4, 2}}, + {VKD3D_SIV_QUAD_V1_TESS_FACTOR, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4, 3}}, + {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelInner, NULL, 2, 0}}, + {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelInner, NULL, 2, 1}}, + + {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4, 0}}, + {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4, 1}}, + {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4, 2}}, + {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelInner, NULL, 2, 0}}, + + {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4, 0}}, + {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4, 1}}, +}; +static const struct vkd3d_spirv_builtin vkd3d_pixel_shader_position_builtin = +{ + VKD3D_SHADER_COMPONENT_FLOAT, 4, SpvBuiltInFragCoord, frag_coord_fixup, +}; +static const struct +{ + enum vkd3d_shader_register_type reg_type; + struct vkd3d_spirv_builtin builtin; +} +vkd3d_register_builtins[] = +{ + {VKD3DSPR_THREADID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInGlobalInvocationId}}, + {VKD3DSPR_LOCALTHREADID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInLocalInvocationId}}, + {VKD3DSPR_LOCALTHREADINDEX, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLocalInvocationIndex}}, + {VKD3DSPR_THREADGROUPID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInWorkgroupId}}, + + {VKD3DSPR_GSINSTID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, + {VKD3DSPR_OUTPOINTID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, + + {VKD3DSPR_PRIMID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}}, + + {VKD3DSPR_TESSCOORD, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, + + {VKD3DSPR_COVERAGE, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, + {VKD3DSPR_SAMPLEMASK, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, + + {VKD3DSPR_DEPTHOUT, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + {VKD3DSPR_DEPTHOUTGE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + {VKD3DSPR_DEPTHOUTLE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + + {VKD3DSPR_OUTSTENCILREF, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, +}; + +static void vkd3d_dxbc_compiler_emit_register_execution_mode(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg) +{ + switch (reg->type) + { + case VKD3DSPR_DEPTHOUTGE: + vkd3d_dxbc_compiler_emit_execution_mode(compiler, SpvExecutionModeDepthGreater, NULL, 0); + break; + case VKD3DSPR_DEPTHOUTLE: + vkd3d_dxbc_compiler_emit_execution_mode(compiler, SpvExecutionModeDepthLess, NULL, 0); + break; + case VKD3DSPR_OUTSTENCILREF: + if (!vkd3d_dxbc_compiler_is_target_extension_supported(compiler, + VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT)) + { + FIXME("The target environment does not support stencil export.\n"); + vkd3d_dxbc_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_STENCIL_EXPORT_UNSUPPORTED, + "Cannot export stencil reference value for register id %u. " + "The target environment does not support stencil export.", reg->idx[0].offset); + } + vkd3d_spirv_enable_capability(&compiler->spirv_builder, SpvCapabilityStencilExportEXT); + vkd3d_dxbc_compiler_emit_execution_mode(compiler, SpvExecutionModeStencilRefReplacingEXT, NULL, 0); + break; + default: + return; + } +} + +static const struct vkd3d_spirv_builtin *get_spirv_builtin_for_sysval( + const struct vkd3d_dxbc_compiler *compiler, enum vkd3d_shader_input_sysval_semantic sysval) +{ + enum vkd3d_shader_spirv_environment environment; + unsigned int i; + + if (!sysval) + return NULL; + + /* In pixel shaders, SV_Position is mapped to SpvBuiltInFragCoord. */ + if (sysval == VKD3D_SIV_POSITION && compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL) + return &vkd3d_pixel_shader_position_builtin; + + environment = vkd3d_dxbc_compiler_get_target_environment(compiler); + for (i = 0; i < ARRAY_SIZE(vkd3d_system_value_builtins); ++i) + { + if (vkd3d_system_value_builtins[i].sysval == sysval + && (!vkd3d_system_value_builtins[i].environment + || vkd3d_system_value_builtins[i].environment == environment)) + return &vkd3d_system_value_builtins[i].builtin; + } + + FIXME("Unhandled builtin (sysval %#x).\n", sysval); + + return NULL; +} + +static const struct vkd3d_spirv_builtin *get_spirv_builtin_for_register( + enum vkd3d_shader_register_type reg_type) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(vkd3d_register_builtins); ++i) + { + if (vkd3d_register_builtins[i].reg_type == reg_type) + return &vkd3d_register_builtins[i].builtin; + } + + return NULL; +} + +static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_register_type reg_type, enum vkd3d_shader_input_sysval_semantic sysval) +{ + const struct vkd3d_spirv_builtin *builtin; + + if ((builtin = get_spirv_builtin_for_sysval(compiler, sysval))) + return builtin; + if ((builtin = get_spirv_builtin_for_register(reg_type))) + return builtin; + + if (sysval != VKD3D_SIV_NONE || (reg_type != VKD3DSPR_OUTPUT && reg_type != VKD3DSPR_COLOROUT)) + FIXME("Unhandled builtin (register type %#x, sysval %#x).\n", reg_type, sysval); + return NULL; +} + +static const struct vkd3d_shader_signature_element *vkd3d_find_signature_element_for_reg( + const struct vkd3d_shader_signature *signature, unsigned int *signature_element_index, + unsigned int reg_idx, DWORD write_mask) +{ + unsigned int signature_idx; + + for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) + { + if (signature->elements[signature_idx].register_index == reg_idx + && (signature->elements[signature_idx].mask & write_mask) == write_mask) + { + if (signature_element_index) + *signature_element_index = signature_idx; + return &signature->elements[signature_idx]; + } + } + + FIXME("Could not find shader signature element (register %u, write mask %#x).\n", + reg_idx, write_mask); + if (signature_element_index) + *signature_element_index = ~0u; + return NULL; +} + +static uint32_t vkd3d_dxbc_compiler_get_invocation_id(struct vkd3d_dxbc_compiler *compiler) +{ + struct vkd3d_shader_register r; + + assert(compiler->shader_type == VKD3D_SHADER_TYPE_HULL); + + memset(&r, 0, sizeof(r)); + r.type = VKD3DSPR_OUTPOINTID; + r.idx[0].offset = ~0u; + r.idx[1].offset = ~0u; + return vkd3d_dxbc_compiler_get_register_id(compiler, &r); +} + +static uint32_t vkd3d_dxbc_compiler_emit_load_invocation_id(struct vkd3d_dxbc_compiler *compiler) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, id; + + id = vkd3d_dxbc_compiler_get_invocation_id(compiler); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, 1); + return vkd3d_spirv_build_op_load(builder, type_id, id, SpvMemoryAccessMaskNone); +} + +static void vkd3d_dxbc_compiler_emit_shader_phase_name(struct vkd3d_dxbc_compiler *compiler, + uint32_t id, const struct vkd3d_shader_phase *phase, const char *suffix) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const char *name; + + if (!suffix) + suffix = ""; + + switch (phase->type) + { + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + name = "control"; + break; + case VKD3DSIH_HS_FORK_PHASE: + name = "fork"; + break; + case VKD3DSIH_HS_JOIN_PHASE: + name = "join"; + break; + default: + ERR("Invalid phase type %#x.\n", phase->type); + return; + } + vkd3d_spirv_build_op_name(builder, id, "%s%u%s", name, phase->idx, suffix); +} + +static void vkd3d_dxbc_compiler_begin_shader_phase(struct vkd3d_dxbc_compiler *compiler, + struct vkd3d_shader_phase *phase) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t void_id, function_type_id; + unsigned int param_count; + uint32_t param_type_id; + + if (phase->instance_count) + { + param_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + param_count = 1; + } + else + { + param_count = 0; + } + + phase->function_id = vkd3d_spirv_alloc_id(builder); + + void_id = vkd3d_spirv_get_op_type_void(builder); + function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, ¶m_type_id, param_count); + vkd3d_spirv_build_op_function(builder, void_id, phase->function_id, + SpvFunctionControlMaskNone, function_type_id); + + if (phase->instance_count) + phase->instance_id = vkd3d_spirv_build_op_function_parameter(builder, param_type_id); + + vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); + phase->function_location = vkd3d_spirv_stream_current_location(&builder->function_stream); + + vkd3d_dxbc_compiler_emit_shader_phase_name(compiler, phase->function_id, phase, NULL); +} + +static const struct vkd3d_shader_phase *vkd3d_dxbc_compiler_get_current_shader_phase( + struct vkd3d_dxbc_compiler *compiler) +{ + struct vkd3d_shader_phase *phase; + + if (!compiler->shader_phase_count) + return NULL; + + phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; + if (!phase->function_id) + vkd3d_dxbc_compiler_begin_shader_phase(compiler, phase); + return phase; +} + +static void vkd3d_dxbc_compiler_decorate_xfb_output(struct vkd3d_dxbc_compiler *compiler, + uint32_t id, unsigned int component_count, const struct vkd3d_shader_signature_element *signature_element) +{ + const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; + const struct vkd3d_shader_transform_feedback_element *xfb_element; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int offset, stride, i; + + if (!xfb_info) + return; + + offset = 0; + xfb_element = NULL; + for (i = 0; i < xfb_info->element_count; ++i) + { + const struct vkd3d_shader_transform_feedback_element *e = &xfb_info->elements[i]; + + if (e->stream_index == signature_element->stream_index + && !ascii_strcasecmp(e->semantic_name, signature_element->semantic_name) + && e->semantic_index == signature_element->semantic_index) + { + xfb_element = e; + break; + } + } + + if (!xfb_element) + return; + + for (i = 0; xfb_element != &xfb_info->elements[i]; ++i) + if (xfb_info->elements[i].output_slot == xfb_element->output_slot) + offset += 4 * xfb_info->elements[i].component_count; + + if (xfb_element->component_index || xfb_element->component_count > component_count) + { + FIXME("Unhandled component range %u, %u.\n", xfb_element->component_index, xfb_element->component_count); + return; + } + + if (xfb_element->output_slot < xfb_info->buffer_stride_count) + { + stride = xfb_info->buffer_strides[xfb_element->output_slot]; + } + else + { + stride = 0; + for (i = 0; i < xfb_info->element_count; ++i) + { + const struct vkd3d_shader_transform_feedback_element *e = &xfb_info->elements[i]; + + if (e->stream_index == xfb_element->stream_index && e->output_slot == xfb_element->output_slot) + stride += 4 * e->component_count; + } + } + + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationXfbBuffer, xfb_element->output_slot); + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationXfbStride, stride); + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationOffset, offset); +} + +static uint32_t vkd3d_dxbc_compiler_emit_builtin_variable(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, unsigned int array_size) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t id; + + array_size = max(array_size, builtin->spirv_array_size); + + id = vkd3d_dxbc_compiler_emit_array_variable(compiler, + &builder->global_stream, storage_class, + builtin->component_type, builtin->component_count, array_size); + vkd3d_spirv_add_iface_variable(builder, id); + vkd3d_dxbc_compiler_decorate_builtin(compiler, id, builtin->spirv_builtin); + + if (compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && storage_class == SpvStorageClassInput + && builtin->component_type != VKD3D_SHADER_COMPONENT_FLOAT + && builtin->component_type != VKD3D_SHADER_COMPONENT_BOOL) + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationFlat, NULL, 0); + + return id; +} + +static bool needs_private_io_variable(const struct vkd3d_shader_signature *signature, + unsigned int reg_idx, const struct vkd3d_spirv_builtin *builtin, + unsigned int *component_count, unsigned int *out_write_mask) +{ + unsigned int write_mask = 0; + bool have_sysval = false; + unsigned int i, count; + + /* Always use private variables for arrayed builtins. These are generally + * scalars on the D3D side, so would need extra array indices when + * accessing them. It may be feasible to insert those indices at the point + * where the builtins are used, but it's not clear it's worth the effort. */ + if (builtin && (builtin->spirv_array_size || builtin->fixup_pfn)) + return true; + + if (*component_count == VKD3D_VEC4_SIZE) + return false; + + for (i = 0, count = 0; i < signature->element_count; ++i) + { + const struct vkd3d_shader_signature_element *current = &signature->elements[i]; + + if (current->register_index != reg_idx) + continue; + + write_mask |= current->mask; + ++count; + + if (current->sysval_semantic) + have_sysval = true; + } + + if (count == 1) + return false; + + if (builtin || have_sysval) + return true; + + if (!vkd3d_bitmask_is_contiguous(write_mask)) + { + FIXME("Write mask %#x is non-contiguous.\n", write_mask); + return true; + } + + assert(vkd3d_write_mask_component_count(write_mask) >= *component_count); + *component_count = vkd3d_write_mask_component_count(write_mask); + *out_write_mask = write_mask; + return false; +} + +static uint32_t vkd3d_dxbc_compiler_emit_input(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_input_sysval_semantic sysval, + enum vkd3d_shader_interpolation_mode interpolation_mode) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_signature_element *signature_element; + const struct vkd3d_shader_signature *shader_signature; + const struct vkd3d_shader_register *reg = &dst->reg; + unsigned int component_idx, input_component_count; + enum vkd3d_shader_component_type component_type; + uint32_t type_id, ptr_type_id, float_type_id; + const struct vkd3d_spirv_builtin *builtin; + struct vkd3d_symbol *symbol = NULL; + uint32_t val_id, input_id, var_id; + struct vkd3d_symbol reg_symbol; + struct vkd3d_symbol tmp_symbol; + SpvStorageClass storage_class; + struct rb_entry *entry = NULL; + bool use_private_var = false; + unsigned int write_mask; + unsigned int array_size; + unsigned int reg_idx; + uint32_t i, index; + + assert(!reg->idx[0].rel_addr); + assert(!reg->idx[1].rel_addr); + + if (reg->idx[1].offset != ~0u) + { + array_size = reg->idx[0].offset; + reg_idx = reg->idx[1].offset; + } + else + { + array_size = 0; + reg_idx = reg->idx[0].offset; + } + + shader_signature = reg->type == VKD3DSPR_PATCHCONST + ? compiler->patch_constant_signature : compiler->input_signature; + + if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, + NULL, reg_idx, dst->write_mask))) + { + FIXME("No signature element for shader input, ignoring shader input.\n"); + return 0; + } + + if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && !sysval && signature_element->sysval_semantic) + sysval = vkd3d_siv_from_sysval(signature_element->sysval_semantic); + + builtin = get_spirv_builtin_for_sysval(compiler, sysval); + + write_mask = signature_element->mask; + + if (builtin) + { + component_type = builtin->component_type; + input_component_count = builtin->component_count; + component_idx = 0; + } + else + { + component_type = signature_element->component_type; + input_component_count = vkd3d_write_mask_component_count(signature_element->mask); + component_idx = vkd3d_write_mask_get_component_idx(signature_element->mask); + } + + if (needs_private_io_variable(shader_signature, reg_idx, builtin, &input_component_count, &write_mask) + && (compiler->shader_type != VKD3D_SHADER_TYPE_HULL + || (reg->type != VKD3DSPR_INCONTROLPOINT && reg->type != VKD3DSPR_PATCHCONST))) + use_private_var = true; + else + component_idx = vkd3d_write_mask_get_component_idx(write_mask); + + storage_class = SpvStorageClassInput; + + vkd3d_symbol_make_register(®_symbol, reg); + + if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) + { + symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); + input_id = symbol->id; + } + else if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL + && (reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST)) + { + /* Input/output registers from one phase can be used as inputs in + * subsequent phases. Specifically: + * + * - Control phase inputs are available as "vicp" in fork and join + * phases. + * - Control phase outputs are available as "vocp" in fork and join + * phases. + * - Fork phase patch constants are available as "vpc" in join + * phases. + * + * We handle "vicp" and "vpc" here by creating aliases to the shader's + * global inputs and outputs. We handle "vocp" in + * vkd3d_dxbc_compiler_leave_shader_phase(). */ + + tmp_symbol = reg_symbol; + if (reg->type == VKD3DSPR_PATCHCONST) + tmp_symbol.key.reg.type = VKD3DSPR_OUTPUT; + else + tmp_symbol.key.reg.type = VKD3DSPR_INPUT; + + if ((entry = rb_get(&compiler->symbol_table, &tmp_symbol))) + { + symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); + tmp_symbol = *symbol; + tmp_symbol.key.reg.type = reg->type; + vkd3d_dxbc_compiler_put_symbol(compiler, &tmp_symbol); + + input_id = symbol->id; + } + else + { + if (reg->type == VKD3DSPR_PATCHCONST) + ERR("Patch constant register %u was not declared in a previous phase.\n", reg_idx); + else + ERR("Input control point register %u was not declared in a previous phase.\n", reg_idx); + } + } + + if (!symbol || ~symbol->info.reg.dcl_mask & write_mask) + { + if (builtin) + { + input_id = vkd3d_dxbc_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); + if (reg->type == VKD3DSPR_PATCHCONST) + vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); + } + else + { + unsigned int location = reg_idx; + + input_id = vkd3d_dxbc_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, component_type, input_component_count, array_size); + vkd3d_spirv_add_iface_variable(builder, input_id); + if (reg->type == VKD3DSPR_PATCHCONST) + { + vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); + location += compiler->input_signature->element_count; + } + vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); + if (component_idx) + vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationComponent, component_idx); + + vkd3d_dxbc_compiler_emit_interpolation_decorations(compiler, input_id, interpolation_mode); + } + } + + if (!symbol) + { + var_id = input_id; + if (use_private_var) + { + storage_class = SpvStorageClassPrivate; + var_id = vkd3d_dxbc_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, array_size); + } + + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, + use_private_var ? VKD3DSP_WRITEMASK_ALL : write_mask); + reg_symbol.info.reg.dcl_mask |= write_mask; + vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); + + vkd3d_dxbc_compiler_emit_register_debug_name(builder, var_id, reg); + } + else + { + symbol->info.reg.dcl_mask |= write_mask; + } + + if (use_private_var) + { + type_id = vkd3d_spirv_get_type_id(builder, component_type, input_component_count); + for (i = 0; i < max(array_size, 1); ++i) + { + struct vkd3d_shader_register dst_reg = *reg; + dst_reg.data_type = VKD3D_DATA_FLOAT; + + val_id = input_id; + if (array_size) + { + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); + index = vkd3d_dxbc_compiler_get_constant_uint(compiler, i); + val_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, ptr_type_id, input_id, index); + dst_reg.idx[0].offset = i; + } + else if (builtin && builtin->spirv_array_size) + { + /* The D3D builtin is not an array, but the SPIR-V builtin is, + * so we'll need to index into the SPIR-V builtin when loading + * it. This happens when reading TessLevel in domain shaders. */ + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); + index = vkd3d_dxbc_compiler_get_constant_uint(compiler, builtin->member_idx); + val_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, ptr_type_id, input_id, index); + dst_reg.idx[0].offset = reg_idx + i; + } + val_id = vkd3d_spirv_build_op_load(builder, type_id, val_id, SpvMemoryAccessMaskNone); + + if (builtin && builtin->fixup_pfn) + val_id = builtin->fixup_pfn(compiler, val_id); + + if (component_type != VKD3D_SHADER_COMPONENT_FLOAT) + { + float_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, input_component_count); + val_id = vkd3d_spirv_build_op_bitcast(builder, float_type_id, val_id); + } + + val_id = vkd3d_dxbc_compiler_emit_swizzle(compiler, val_id, + vkd3d_write_mask_from_component_count(input_component_count), + VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_SHADER_NO_SWIZZLE, dst->write_mask >> component_idx); + + vkd3d_dxbc_compiler_emit_store_reg(compiler, &dst_reg, dst->write_mask, val_id); + } + } + + return input_id; +} + +static void vkd3d_dxbc_compiler_emit_input_register(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_dst_param *dst) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_register *reg = &dst->reg; + const struct vkd3d_spirv_builtin *builtin; + struct vkd3d_symbol reg_symbol; + struct rb_entry *entry; + uint32_t write_mask; + uint32_t input_id; + + assert(!reg->idx[0].rel_addr); + assert(!reg->idx[1].rel_addr); + assert(reg->idx[1].offset == ~0u); + + if (!(builtin = get_spirv_builtin_for_register(reg->type))) + { + FIXME("Unhandled register %#x.\n", reg->type); + return; + } + + /* vPrim may be declared in multiple hull shader phases. */ + vkd3d_symbol_make_register(®_symbol, reg); + if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) + return; + + input_id = vkd3d_dxbc_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassInput, 0); + + write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); + vkd3d_symbol_set_register_info(®_symbol, input_id, + SpvStorageClassInput, builtin->component_type, write_mask); + reg_symbol.info.reg.dcl_mask = write_mask; + reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; + vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); + vkd3d_dxbc_compiler_emit_register_debug_name(builder, input_id, reg); +} + +static void vkd3d_dxbc_compiler_emit_shader_phase_input(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_phase *phase, const struct vkd3d_shader_dst_param *dst) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_register *reg = &dst->reg; + struct vkd3d_symbol reg_symbol; + uint32_t val_id; + + switch (reg->type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_PATCHCONST: + vkd3d_dxbc_compiler_emit_input(compiler, dst, VKD3D_SIV_NONE, VKD3DSIM_NONE); + return; + case VKD3DSPR_PRIMID: + vkd3d_dxbc_compiler_emit_input_register(compiler, dst); + return; + case VKD3DSPR_FORKINSTID: + case VKD3DSPR_JOININSTID: + val_id = phase->instance_id; + break; + case VKD3DSPR_OUTPOINTID: /* Emitted in vkd3d_dxbc_compiler_emit_initial_declarations(). */ + case VKD3DSPR_OUTCONTROLPOINT: /* See vkd3d_dxbc_compiler_leave_shader_phase(). */ + return; + default: + FIXME("Unhandled shader phase input register %#x.\n", reg->type); + return; + } + + vkd3d_symbol_make_register(®_symbol, reg); + vkd3d_symbol_set_register_info(®_symbol, val_id, + SpvStorageClassMax /* Intermediate value */, + VKD3D_SHADER_COMPONENT_UINT, VKD3DSP_WRITEMASK_0); + vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); + vkd3d_dxbc_compiler_emit_register_debug_name(builder, val_id, reg); +} + +static unsigned int vkd3d_dxbc_compiler_get_output_variable_index( + struct vkd3d_dxbc_compiler *compiler, unsigned int register_idx) +{ + if (register_idx == ~0u) /* oDepth */ + return ARRAY_SIZE(compiler->private_output_variable) - 1; + assert(register_idx < ARRAY_SIZE(compiler->private_output_variable) - 1); + return register_idx; +} + +static unsigned int get_shader_output_swizzle(const struct vkd3d_dxbc_compiler *compiler, + unsigned int register_idx) +{ + const struct vkd3d_shader_spirv_target_info *info; + + if (!(info = compiler->spirv_target_info)) + return VKD3D_SHADER_NO_SWIZZLE; + if (register_idx >= info->output_swizzle_count) + return VKD3D_SHADER_NO_SWIZZLE; + return info->output_swizzles[register_idx]; +} + +static bool is_dual_source_blending(const struct vkd3d_dxbc_compiler *compiler) +{ + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + + return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && info && info->dual_source_blending; +} + +static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signature_element *e, + uint32_t *mask) +{ + if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) + { + FIXME("Invalid semantic index %u for clip/cull distance.\n", e->semantic_index); + return; + } + + *mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); +} + +static uint32_t calculate_sysval_array_mask(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_signature *signature, enum vkd3d_shader_input_sysval_semantic sysval) +{ + const struct vkd3d_shader_signature_element *e; + const struct vkd3d_spirv_builtin *sig_builtin; + const struct vkd3d_spirv_builtin *builtin; + uint32_t signature_idx, mask = 0; + + if (!(builtin = get_spirv_builtin_for_sysval(compiler, sysval))) + { + FIXME("Unhandled sysval %#x.\n", sysval); + return 0; + } + + for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) + { + e = &signature->elements[signature_idx]; + + sig_builtin = get_spirv_builtin_for_sysval(compiler, + vkd3d_siv_from_sysval_indexed(e->sysval_semantic, e->semantic_index)); + + if (sig_builtin && sig_builtin->spirv_builtin == builtin->spirv_builtin) + mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * sig_builtin->member_idx); + } + + return mask; +} + +/* Emits arrayed SPIR-V built-in variables. */ +static void vkd3d_dxbc_compiler_emit_shader_signature_outputs(struct vkd3d_dxbc_compiler *compiler) +{ + const struct vkd3d_shader_signature *output_signature = compiler->output_signature; + uint32_t clip_distance_mask = 0, clip_distance_id = 0; + uint32_t cull_distance_mask = 0, cull_distance_id = 0; + const struct vkd3d_spirv_builtin *builtin; + unsigned int i, count; + + for (i = 0; i < output_signature->element_count; ++i) + { + const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; + + switch (e->sysval_semantic) + { + case VKD3D_SHADER_SV_CLIP_DISTANCE: + calculate_clip_or_cull_distance_mask(e, &clip_distance_mask); + break; + + case VKD3D_SHADER_SV_CULL_DISTANCE: + calculate_clip_or_cull_distance_mask(e, &cull_distance_mask); + break; + + default: + break; + } + } + + if (clip_distance_mask) + { + count = vkd3d_popcount(clip_distance_mask); + builtin = get_spirv_builtin_for_sysval(compiler, VKD3D_SIV_CLIP_DISTANCE); + clip_distance_id = vkd3d_dxbc_compiler_emit_builtin_variable(compiler, + builtin, SpvStorageClassOutput, count); + } + + if (cull_distance_mask) + { + count = vkd3d_popcount(cull_distance_mask); + builtin = get_spirv_builtin_for_sysval(compiler, VKD3D_SIV_CULL_DISTANCE); + cull_distance_id = vkd3d_dxbc_compiler_emit_builtin_variable(compiler, + builtin, SpvStorageClassOutput, count); + } + + for (i = 0; i < output_signature->element_count; ++i) + { + const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; + + switch (e->sysval_semantic) + { + case VKD3D_SHADER_SV_CLIP_DISTANCE: + compiler->output_info[i].id = clip_distance_id; + compiler->output_info[i].component_type = VKD3D_SHADER_COMPONENT_FLOAT; + compiler->output_info[i].array_element_mask = clip_distance_mask; + break; + + case VKD3D_SHADER_SV_CULL_DISTANCE: + compiler->output_info[i].id = cull_distance_id; + compiler->output_info[i].component_type = VKD3D_SHADER_COMPONENT_FLOAT; + compiler->output_info[i].array_element_mask = cull_distance_mask; + break; + + default: + break; + } + } +} + +static void vkd3d_dxbc_compiler_emit_output_register(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_dst_param *dst) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_register *reg = &dst->reg; + const struct vkd3d_spirv_builtin *builtin; + struct vkd3d_symbol reg_symbol; + uint32_t write_mask; + uint32_t output_id; + + assert(!reg->idx[0].rel_addr); + assert(!reg->idx[1].rel_addr); + assert(reg->idx[1].offset == ~0u); + + if (!(builtin = get_spirv_builtin_for_register(reg->type))) + { + FIXME("Unhandled register %#x.\n", reg->type); + return; + } + + output_id = vkd3d_dxbc_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); + + vkd3d_symbol_make_register(®_symbol, reg); + write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); + vkd3d_symbol_set_register_info(®_symbol, output_id, + SpvStorageClassOutput, builtin->component_type, write_mask); + reg_symbol.info.reg.dcl_mask = write_mask; + reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; + vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); + vkd3d_dxbc_compiler_emit_register_execution_mode(compiler, reg); + vkd3d_dxbc_compiler_emit_register_debug_name(builder, output_id, reg); +} + +static uint32_t vkd3d_dxbc_compiler_emit_shader_phase_builtin_variable(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_phase *phase, const struct vkd3d_spirv_builtin *builtin) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t *variable_id, id; + + variable_id = NULL; + + if (builtin->spirv_builtin == SpvBuiltInTessLevelOuter) + variable_id = &compiler->hs.tess_level_outer_id; + else if (builtin->spirv_builtin == SpvBuiltInTessLevelInner) + variable_id = &compiler->hs.tess_level_inner_id; + + if (variable_id && *variable_id) + return *variable_id; + + id = vkd3d_dxbc_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); + if (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE) + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); + + if (variable_id) + *variable_id = id; + return id; +} + +static void vkd3d_dxbc_compiler_emit_output(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_input_sysval_semantic sysval) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_signature_element *signature_element; + const struct vkd3d_shader_signature *shader_signature; + const struct vkd3d_shader_register *reg = &dst->reg; + unsigned int component_idx, output_component_count; + enum vkd3d_shader_component_type component_type; + const struct vkd3d_spirv_builtin *builtin; + const struct vkd3d_shader_phase *phase; + struct vkd3d_symbol *symbol = NULL; + bool use_private_variable = false; + struct vkd3d_symbol reg_symbol; + SpvStorageClass storage_class; + struct rb_entry *entry = NULL; + unsigned int signature_idx; + unsigned int write_mask; + unsigned int array_size; + bool is_patch_constant; + uint32_t id, var_id; + + phase = vkd3d_dxbc_compiler_get_current_shader_phase(compiler); + is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); + + shader_signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; + + array_size = is_control_point_phase(phase) ? compiler->output_control_point_count : 0; + + if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, + &signature_idx, reg->idx[0].offset, dst->write_mask))) + { + FIXME("No signature element for shader output, ignoring shader output.\n"); + return; + } + + builtin = vkd3d_get_spirv_builtin(compiler, dst->reg.type, sysval); + + write_mask = signature_element->mask; + + component_idx = vkd3d_write_mask_get_component_idx(dst->write_mask); + output_component_count = vkd3d_write_mask_component_count(signature_element->mask); + if (builtin) + { + component_type = builtin->component_type; + if (!builtin->spirv_array_size) + output_component_count = builtin->component_count; + component_idx = 0; + } + else + { + component_type = signature_element->component_type; + } + + storage_class = SpvStorageClassOutput; + + if (get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE + || needs_private_io_variable(shader_signature, signature_element->register_index, + builtin, &output_component_count, &write_mask) + || is_patch_constant) + use_private_variable = true; + else + component_idx = vkd3d_write_mask_get_component_idx(write_mask); + + vkd3d_symbol_make_register(®_symbol, reg); + + if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) + { + symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); + id = symbol->id; + } + + if (!symbol || ~symbol->info.reg.dcl_mask & write_mask) + { + if (compiler->output_info[signature_idx].id) + { + id = compiler->output_info[signature_idx].id; + if (compiler->output_info[signature_idx].array_element_mask) + use_private_variable = true; + } + else if (builtin) + { + if (phase) + id = vkd3d_dxbc_compiler_emit_shader_phase_builtin_variable(compiler, phase, builtin); + else + id = vkd3d_dxbc_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); + + if (builtin->spirv_array_size) + compiler->output_info[signature_idx].array_element_mask = + calculate_sysval_array_mask(compiler, shader_signature, sysval); + + vkd3d_dxbc_compiler_emit_register_execution_mode(compiler, &dst->reg); + } + else + { + unsigned int location = reg->idx[0].offset; + + if (is_patch_constant) + location += compiler->output_signature->element_count; + + id = vkd3d_dxbc_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, component_type, output_component_count, array_size); + vkd3d_spirv_add_iface_variable(builder, id); + + if (is_dual_source_blending(compiler) && reg->idx[0].offset < 2) + { + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, reg->idx[0].offset); + } + else + { + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, location); + } + + if (component_idx) + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationComponent, component_idx); + } + + if (is_patch_constant) + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); + + vkd3d_dxbc_compiler_decorate_xfb_output(compiler, id, output_component_count, signature_element); + + compiler->output_info[signature_idx].id = id; + compiler->output_info[signature_idx].component_type = component_type; + } + + if (!symbol) + { + var_id = id; + if (use_private_variable) + storage_class = SpvStorageClassPrivate; + if (is_patch_constant) + var_id = compiler->hs.patch_constants_id; + else if (use_private_variable) + var_id = vkd3d_dxbc_compiler_emit_variable(compiler, &builder->global_stream, + storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, + use_private_variable ? VKD3DSP_WRITEMASK_ALL : write_mask); + reg_symbol.info.reg.is_aggregate = use_private_variable ? is_patch_constant : array_size; + if (!use_private_variable && is_control_point_phase(phase)) + { + reg_symbol.info.reg.member_idx = vkd3d_dxbc_compiler_get_invocation_id(compiler); + reg_symbol.info.reg.is_dynamically_indexed = true; + } + else if (is_patch_constant) + { + reg_symbol.info.reg.member_idx = reg->idx[0].offset; + } + reg_symbol.info.reg.dcl_mask = write_mask; + + vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); + + if (!is_patch_constant) + vkd3d_dxbc_compiler_emit_register_debug_name(builder, var_id, reg); + } + else + { + symbol->info.reg.dcl_mask |= write_mask; + var_id = symbol->id; + } + + if (use_private_variable) + { + unsigned int idx = vkd3d_dxbc_compiler_get_output_variable_index(compiler, reg->idx[0].offset); + compiler->private_output_variable[idx] = var_id; + compiler->private_output_variable_write_mask[idx] |= dst->write_mask; + if (is_patch_constant) + compiler->private_output_variable_array_idx[idx] = vkd3d_dxbc_compiler_get_constant_uint( + compiler, reg->idx[0].offset); + if (!compiler->epilogue_function_id) + compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder); + } +} + +static uint32_t vkd3d_dxbc_compiler_get_output_array_index(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_signature_element *e) +{ + enum vkd3d_shader_input_sysval_semantic sysval; + const struct vkd3d_spirv_builtin *builtin; + + sysval = vkd3d_siv_from_sysval_indexed(e->sysval_semantic, e->semantic_index); + builtin = get_spirv_builtin_for_sysval(compiler, sysval); + + switch (sysval) + { + case VKD3D_SIV_LINE_DETAIL_TESS_FACTOR: + case VKD3D_SIV_LINE_DENSITY_TESS_FACTOR: + return builtin->member_idx; + default: + return e->semantic_index; + } +} + +static void vkd3d_dxbc_compiler_emit_store_shader_output(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_signature *signature, const struct vkd3d_shader_signature_element *output, + const struct vkd3d_shader_output_info *output_info, + uint32_t output_index_id, uint32_t val_id, unsigned int write_mask) +{ + unsigned int dst_write_mask, use_mask, uninit_mask, swizzle, mask; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, zero_id, ptr_type_id, chain_id, object_id; + const struct vkd3d_shader_signature_element *element; + unsigned int i, index, array_idx; + uint32_t output_id; + + dst_write_mask = output->mask; + use_mask = output->used_mask; + if (!output->sysval_semantic) + { + for (i = 0; i < signature->element_count; ++i) + { + element = &signature->elements[i]; + if (element->register_index != output->register_index) + continue; + if (element->sysval_semantic) + continue; + dst_write_mask |= element->mask; + use_mask |= element->used_mask; + } + } + write_mask &= dst_write_mask; + + if (!write_mask) + return; + + if (output_info->component_type != VKD3D_SHADER_COMPONENT_FLOAT) + { + type_id = vkd3d_spirv_get_type_id(builder, output_info->component_type, VKD3D_VEC4_SIZE); + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + } + + swizzle = get_shader_output_swizzle(compiler, output->register_index); + uninit_mask = dst_write_mask & ~use_mask; + if (uninit_mask) + { + /* Set values to 0 for not initialized shader output components. */ + write_mask |= uninit_mask; + zero_id = vkd3d_dxbc_compiler_get_constant_vector(compiler, + output_info->component_type, VKD3D_VEC4_SIZE, 0); + val_id = vkd3d_dxbc_compiler_emit_vector_shuffle(compiler, + zero_id, val_id, swizzle, uninit_mask, output_info->component_type, + vkd3d_write_mask_component_count(write_mask)); + } + else + { + val_id = vkd3d_dxbc_compiler_emit_swizzle(compiler, + val_id, VKD3DSP_WRITEMASK_ALL, output_info->component_type, swizzle, write_mask); + } + + output_id = output_info->id; + if (output_index_id) + { + type_id = vkd3d_spirv_get_type_id(builder, + output_info->component_type, vkd3d_write_mask_component_count(dst_write_mask)); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); + output_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, output_id, output_index_id); + } + + if (!output_info->array_element_mask) + { + vkd3d_dxbc_compiler_emit_store(compiler, + output_id, dst_write_mask, output_info->component_type, SpvStorageClassOutput, write_mask, val_id); + return; + } + + type_id = vkd3d_spirv_get_type_id(builder, output_info->component_type, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); + mask = output_info->array_element_mask; + array_idx = vkd3d_dxbc_compiler_get_output_array_index(compiler, output); + mask &= (1u << (array_idx * VKD3D_VEC4_SIZE)) - 1; + for (i = 0, index = vkd3d_popcount(mask); i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask & (VKD3DSP_WRITEMASK_0 << i))) + continue; + + chain_id = vkd3d_spirv_build_op_access_chain1(builder, + ptr_type_id, output_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, index)); + object_id = vkd3d_dxbc_compiler_emit_swizzle(compiler, val_id, write_mask, + output_info->component_type, VKD3D_SHADER_NO_SWIZZLE, VKD3DSP_WRITEMASK_0 << i); + vkd3d_dxbc_compiler_emit_store(compiler, chain_id, VKD3DSP_WRITEMASK_0, + output_info->component_type, SpvStorageClassOutput, VKD3DSP_WRITEMASK_0 << i, object_id); + ++index; + } +} + +static void vkd3d_dxbc_compiler_emit_shader_epilogue_function(struct vkd3d_dxbc_compiler *compiler) +{ + uint32_t param_type_id[MAX_REG_OUTPUT + 1], param_id[MAX_REG_OUTPUT + 1] = {0}; + uint32_t void_id, type_id, ptr_type_id, function_type_id, function_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_signature *signature; + const struct vkd3d_shader_phase *phase; + uint32_t output_index_id = 0; + bool is_patch_constant; + unsigned int i, count; + DWORD variable_idx; + + STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(param_id)); + STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(param_type_id)); + STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(compiler->private_output_variable_array_idx)); + STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(compiler->private_output_variable_write_mask)); + + phase = vkd3d_dxbc_compiler_get_current_shader_phase(compiler); + is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); + + signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; + + function_id = compiler->epilogue_function_id; + + void_id = vkd3d_spirv_get_op_type_void(builder); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); + for (i = 0, count = 0; i < ARRAY_SIZE(compiler->private_output_variable); ++i) + { + if (compiler->private_output_variable[i]) + param_type_id[count++] = ptr_type_id; + } + function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, param_type_id, count); + + vkd3d_spirv_build_op_function(builder, void_id, function_id, + SpvFunctionControlMaskNone, function_type_id); + + for (i = 0; i < ARRAY_SIZE(compiler->private_output_variable); ++i) + { + if (compiler->private_output_variable[i]) + param_id[i] = vkd3d_spirv_build_op_function_parameter(builder, ptr_type_id); + } + + vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); + + for (i = 0; i < ARRAY_SIZE(compiler->private_output_variable); ++i) + { + if (compiler->private_output_variable[i]) + param_id[i] = vkd3d_spirv_build_op_load(builder, type_id, param_id[i], SpvMemoryAccessMaskNone); + } + + if (is_control_point_phase(phase)) + output_index_id = vkd3d_dxbc_compiler_emit_load_invocation_id(compiler); + + for (i = 0; i < signature->element_count; ++i) + { + if (!compiler->output_info[i].id) + continue; + + variable_idx = vkd3d_dxbc_compiler_get_output_variable_index(compiler, + signature->elements[i].register_index); + if (!param_id[variable_idx]) + continue; + + vkd3d_dxbc_compiler_emit_store_shader_output(compiler, signature, + &signature->elements[i], &compiler->output_info[i], output_index_id, + param_id[variable_idx], compiler->private_output_variable_write_mask[variable_idx]); + } + + vkd3d_spirv_build_op_return(&compiler->spirv_builder); + vkd3d_spirv_build_op_function_end(builder); + + memset(compiler->private_output_variable, 0, sizeof(compiler->private_output_variable)); + memset(compiler->private_output_variable_array_idx, 0, sizeof(compiler->private_output_variable_array_idx)); + memset(compiler->private_output_variable_write_mask, 0, sizeof(compiler->private_output_variable_write_mask)); + compiler->epilogue_function_id = 0; +} + +static void vkd3d_dxbc_compiler_emit_hull_shader_builtins(struct vkd3d_dxbc_compiler *compiler) +{ + struct vkd3d_shader_dst_param dst; + + memset(&dst, 0, sizeof(dst)); + dst.reg.type = VKD3DSPR_OUTPOINTID; + dst.reg.idx[0].offset = ~0u; + dst.reg.idx[1].offset = ~0u; + dst.write_mask = VKD3DSP_WRITEMASK_0; + vkd3d_dxbc_compiler_emit_input_register(compiler, &dst); +} + +static void vkd3d_dxbc_compiler_emit_hull_shader_patch_constants(struct vkd3d_dxbc_compiler *compiler) +{ + const struct vkd3d_shader_signature *signature = compiler->patch_constant_signature; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t register_count = 0; + unsigned int signature_idx; + + for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) + register_count = max(register_count, signature->elements[signature_idx].register_index + 1); + + if (!register_count) + return; + + compiler->hs.patch_constants_id = vkd3d_dxbc_compiler_emit_array_variable(compiler, &builder->global_stream, + SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, register_count); + vkd3d_spirv_build_op_name(builder, compiler->hs.patch_constants_id, "opc"); +} + +static void vkd3d_dxbc_compiler_emit_initial_declarations(struct vkd3d_dxbc_compiler *compiler) +{ + const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + + switch (compiler->shader_type) + { + case VKD3D_SHADER_TYPE_VERTEX: + vkd3d_spirv_set_execution_model(builder, SpvExecutionModelVertex); + break; + case VKD3D_SHADER_TYPE_HULL: + vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl); + vkd3d_dxbc_compiler_emit_hull_shader_builtins(compiler); + vkd3d_dxbc_compiler_emit_hull_shader_patch_constants(compiler); + break; + case VKD3D_SHADER_TYPE_DOMAIN: + vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation); + break; + case VKD3D_SHADER_TYPE_GEOMETRY: + vkd3d_spirv_set_execution_model(builder, SpvExecutionModelGeometry); + builder->invocation_count = 1; + break; + case VKD3D_SHADER_TYPE_PIXEL: + vkd3d_spirv_set_execution_model(builder, SpvExecutionModelFragment); + vkd3d_dxbc_compiler_emit_execution_mode(compiler, SpvExecutionModeOriginUpperLeft, NULL, 0); + break; + case VKD3D_SHADER_TYPE_COMPUTE: + vkd3d_spirv_set_execution_model(builder, SpvExecutionModelGLCompute); + break; + default: + ERR("Invalid shader type %#x.\n", compiler->shader_type); + } + + if (xfb_info && xfb_info->element_count) + { + vkd3d_spirv_enable_capability(builder, SpvCapabilityTransformFeedback); + vkd3d_dxbc_compiler_emit_execution_mode(compiler, SpvExecutionModeXfb, NULL, 0); + } + + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) + { + vkd3d_spirv_builder_begin_main_function(builder); + + vkd3d_dxbc_compiler_emit_shader_signature_outputs(compiler); + } +} + +static size_t vkd3d_dxbc_compiler_get_current_function_location(struct vkd3d_dxbc_compiler *compiler) +{ + const struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_phase *phase; + + if ((phase = vkd3d_dxbc_compiler_get_current_shader_phase(compiler))) + return phase->function_location; + + return builder->main_function_location; +} + +static void vkd3d_dxbc_compiler_emit_dcl_global_flags(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + unsigned int flags = instruction->flags; + + if (flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL) + { + vkd3d_dxbc_compiler_emit_execution_mode(compiler, SpvExecutionModeEarlyFragmentTests, NULL, 0); + flags &= ~VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; + } + + if (flags & (VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS | VKD3DSGF_ENABLE_11_1_DOUBLE_EXTENSIONS)) + { + vkd3d_spirv_enable_capability(&compiler->spirv_builder, SpvCapabilityFloat64); + flags &= ~(VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS | VKD3DSGF_ENABLE_11_1_DOUBLE_EXTENSIONS); + } + + if (flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS)) + FIXME("Unhandled global flags %#x.\n", flags); + else + WARN("Unhandled global flags %#x.\n", flags); +} + +static void vkd3d_dxbc_compiler_emit_dcl_temps(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + size_t function_location; + unsigned int i; + uint32_t id; + + function_location = vkd3d_dxbc_compiler_get_current_function_location(compiler); + vkd3d_spirv_begin_function_stream_insertion(builder, function_location); + + assert(!compiler->temp_count); + compiler->temp_count = instruction->declaration.count; + for (i = 0; i < compiler->temp_count; ++i) + { + id = vkd3d_dxbc_compiler_emit_variable(compiler, &builder->function_stream, + SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + if (!i) + compiler->temp_id = id; + assert(id == compiler->temp_id + i); + + vkd3d_spirv_build_op_name(builder, id, "r%u", i); + } + + vkd3d_spirv_end_function_stream_insertion(builder); +} + +static void vkd3d_dxbc_compiler_emit_dcl_indexable_temp(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_indexable_temp *temp = &instruction->declaration.indexable_temp; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + struct vkd3d_shader_register reg; + struct vkd3d_symbol reg_symbol; + size_t function_location; + uint32_t id; + + if (temp->component_count != 4) + FIXME("Unhandled component count %u.\n", temp->component_count); + + memset(®, 0, sizeof(reg)); + reg.type = VKD3DSPR_IDXTEMP; + reg.idx[0].offset = temp->register_idx; + reg.idx[1].offset = ~0u; + + function_location = vkd3d_dxbc_compiler_get_current_function_location(compiler); + vkd3d_spirv_begin_function_stream_insertion(builder, function_location); + + id = vkd3d_dxbc_compiler_emit_array_variable(compiler, &builder->function_stream, + SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, temp->register_size); + + vkd3d_dxbc_compiler_emit_register_debug_name(builder, id, ®); + + vkd3d_spirv_end_function_stream_insertion(builder); + + vkd3d_symbol_make_register(®_symbol, ®); + vkd3d_symbol_set_register_info(®_symbol, id, + SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); +} + +static void vkd3d_dxbc_compiler_emit_push_constant_buffers(struct vkd3d_dxbc_compiler *compiler) +{ + unsigned int i, j, count, reg_idx, descriptor_offsets_member_idx = 0; + const SpvStorageClass storage_class = SpvStorageClassPushConstant; + uint32_t vec4_id, length_id, struct_id, pointer_type_id, var_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + struct vkd3d_symbol reg_symbol; + uint32_t *member_ids; + + count = !!compiler->offset_info.descriptor_table_count; + for (i = 0; i < compiler->shader_interface.push_constant_buffer_count; ++i) + { + const struct vkd3d_push_constant_buffer_binding *cb = &compiler->push_constants[i]; + + if (cb->reg.type) + ++count; + } + if (!count) + return; + + if (!(member_ids = vkd3d_calloc(count, sizeof(*member_ids)))) + return; + + vec4_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + + for (i = 0, j = 0; i < compiler->shader_interface.push_constant_buffer_count; ++i) + { + const struct vkd3d_push_constant_buffer_binding *cb = &compiler->push_constants[i]; + if (!cb->reg.type) + continue; + + length_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, cb->size); + member_ids[j] = vkd3d_spirv_build_op_type_array(builder, vec4_id, length_id); + vkd3d_spirv_build_op_decorate1(builder, member_ids[j], SpvDecorationArrayStride, 16); + + ++j; + } + + if (compiler->offset_info.descriptor_table_count) + { + uint32_t type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + length_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, compiler->offset_info.descriptor_table_count); + member_ids[j] = vkd3d_spirv_build_op_type_array(builder, type_id, length_id); + vkd3d_spirv_build_op_decorate1(builder, member_ids[j], SpvDecorationArrayStride, 4); + descriptor_offsets_member_idx = j; + compiler->descriptor_offsets_member_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, j); + assert(j == count - 1); + } + + struct_id = vkd3d_spirv_build_op_type_struct(builder, member_ids, count); + vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); + vkd3d_spirv_build_op_name(builder, struct_id, "push_cb"); + vkd3d_free(member_ids); + + pointer_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, struct_id); + var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, + pointer_type_id, storage_class, 0); + compiler->push_constants_var_id = var_id; + + for (i = 0, j = 0; i < compiler->shader_interface.push_constant_buffer_count; ++i) + { + const struct vkd3d_push_constant_buffer_binding *cb = &compiler->push_constants[i]; + if (!cb->reg.type) + continue; + + reg_idx = cb->reg.idx[0].offset; + vkd3d_spirv_build_op_member_decorate1(builder, struct_id, j, + SpvDecorationOffset, cb->pc.offset); + vkd3d_spirv_build_op_member_name(builder, struct_id, j, "cb%u", reg_idx); + + vkd3d_symbol_make_register(®_symbol, &cb->reg); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + reg_symbol.info.reg.member_idx = j; + vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); + + ++j; + } + if (compiler->offset_info.descriptor_table_count) + { + vkd3d_spirv_build_op_member_decorate1(builder, struct_id, descriptor_offsets_member_idx, + SpvDecorationOffset, compiler->offset_info.descriptor_table_offset); + } +} + +struct vkd3d_descriptor_variable_info +{ + const struct vkd3d_symbol *array_symbol; + unsigned int binding_base_idx; +}; + +static uint32_t vkd3d_dxbc_compiler_build_descriptor_variable(struct vkd3d_dxbc_compiler *compiler, + SpvStorageClass storage_class, uint32_t type_id, const struct vkd3d_shader_register *reg, + const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, + bool is_uav_counter, struct vkd3d_descriptor_variable_info *var_info) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + struct vkd3d_descriptor_binding_address binding_address; + struct vkd3d_shader_descriptor_binding binding; + uint32_t array_type_id, ptr_type_id, var_id; + struct vkd3d_symbol symbol; + struct rb_entry *entry; + + binding = vkd3d_dxbc_compiler_get_descriptor_binding(compiler, reg, range, + resource_type, is_uav_counter, &binding_address); + var_info->binding_base_idx = binding_address.binding_base_idx; + + if (binding.count == 1 && range->first == binding_address.binding_base_idx && range->last != ~0u + && binding_address.push_constant_index == ~0u) + { + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); + var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, + ptr_type_id, storage_class, 0); + + vkd3d_dxbc_compiler_emit_descriptor_binding(compiler, var_id, &binding); + vkd3d_dxbc_compiler_emit_register_debug_name(builder, var_id, reg); + + var_info->array_symbol = NULL; + return var_id; + } + + vkd3d_spirv_enable_capability(builder, SpvCapabilityRuntimeDescriptorArrayEXT); + array_type_id = vkd3d_spirv_get_op_type_runtime_array(builder, type_id); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, array_type_id); + + /* Declare one array variable per Vulkan binding, and use it for + * all array declarations which map to it. */ + symbol.type = VKD3D_SYMBOL_DESCRIPTOR_ARRAY; + memset(&symbol.key, 0, sizeof(symbol.key)); + symbol.key.descriptor_array.ptr_type_id = ptr_type_id; + symbol.key.descriptor_array.set = binding.set; + symbol.key.descriptor_array.binding = binding.binding; + symbol.key.descriptor_array.push_constant_index = binding_address.push_constant_index; + if ((entry = rb_get(&compiler->symbol_table, &symbol))) + { + var_info->array_symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); + return var_info->array_symbol->id; + } + + var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, + ptr_type_id, storage_class, 0); + vkd3d_dxbc_compiler_emit_descriptor_binding(compiler, var_id, &binding); + vkd3d_dxbc_compiler_emit_register_debug_name(builder, var_id, reg); + + symbol.id = var_id; + symbol.descriptor_array = NULL; + symbol.info.descriptor_array.storage_class = storage_class; + symbol.info.descriptor_array.contained_type_id = type_id; + var_info->array_symbol = vkd3d_dxbc_compiler_put_symbol(compiler, &symbol); + + return var_id; +} + +static void vkd3d_dxbc_compiler_emit_dcl_constant_buffer(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; + const SpvStorageClass storage_class = SpvStorageClassUniform; + const struct vkd3d_shader_register *reg = &cb->src.reg; + struct vkd3d_push_constant_buffer_binding *push_cb; + struct vkd3d_descriptor_variable_info var_info; + struct vkd3d_symbol reg_symbol; + + assert(!(instruction->flags & ~VKD3DSI_INDEXED_DYNAMIC)); + + if ((push_cb = vkd3d_dxbc_compiler_find_push_constant_buffer(compiler, cb))) + { + /* Push constant buffers are handled in + * vkd3d_dxbc_compiler_emit_push_constant_buffers(). + */ + unsigned int cb_size_in_bytes = cb->size * VKD3D_VEC4_SIZE * sizeof(uint32_t); + push_cb->reg = *reg; + push_cb->size = cb->size; + if (cb_size_in_bytes > push_cb->pc.size) + { + WARN("Constant buffer size %u exceeds push constant size %u.\n", + cb_size_in_bytes, push_cb->pc.size); + } + return; + } + + vec4_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + length_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, cb->size); + array_type_id = vkd3d_spirv_build_op_type_array(builder, vec4_id, length_id); + vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, 16); + + struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1); + vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); + vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, SpvDecorationOffset, 0); + vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", cb->size); + + var_id = vkd3d_dxbc_compiler_build_descriptor_variable(compiler, storage_class, struct_id, + reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); + + vkd3d_symbol_make_register(®_symbol, reg); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + reg_symbol.descriptor_array = var_info.array_symbol; + reg_symbol.info.reg.binding_base_idx = var_info.binding_base_idx; + vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); +} + +static void vkd3d_dxbc_compiler_emit_dcl_immediate_constant_buffer(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_immediate_constant_buffer *icb = instruction->declaration.icb; + uint32_t *elements, length_id, type_id, const_id, ptr_type_id, icb_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + struct vkd3d_shader_register reg; + struct vkd3d_symbol reg_symbol; + unsigned int i; + + if (!(elements = vkd3d_calloc(icb->vec4_count, sizeof(*elements)))) + return; + for (i = 0; i < icb->vec4_count; ++i) + elements[i] = vkd3d_dxbc_compiler_get_constant(compiler, + VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, &icb->data[4 * i]); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + length_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, icb->vec4_count); + type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + const_id = vkd3d_spirv_build_op_constant_composite(builder, type_id, elements, icb->vec4_count); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); + icb_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, + ptr_type_id, SpvStorageClassPrivate, const_id); + vkd3d_spirv_build_op_name(builder, icb_id, "icb"); + vkd3d_free(elements); + + memset(®, 0, sizeof(reg)); + reg.type = VKD3DSPR_IMMCONSTBUFFER; + vkd3d_symbol_make_register(®_symbol, ®); + vkd3d_symbol_set_register_info(®_symbol, icb_id, SpvStorageClassPrivate, + VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); +} + +static void vkd3d_dxbc_compiler_emit_dcl_sampler(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; + const SpvStorageClass storage_class = SpvStorageClassUniformConstant; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_register *reg = &sampler->src.reg; + struct vkd3d_descriptor_variable_info var_info; + struct vkd3d_symbol reg_symbol; + uint32_t type_id, var_id; + + vkd3d_symbol_make_sampler(®_symbol, reg); + reg_symbol.info.sampler.range = sampler->range; + vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); + + if (vkd3d_dxbc_compiler_has_combined_sampler(compiler, NULL, sampler)) + return; + + type_id = vkd3d_spirv_get_op_type_sampler(builder); + var_id = vkd3d_dxbc_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, + &sampler->range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); + + vkd3d_symbol_make_register(®_symbol, reg); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + reg_symbol.descriptor_array = var_info.array_symbol; + reg_symbol.info.reg.binding_base_idx = var_info.binding_base_idx; + vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); +} + +static const struct vkd3d_spirv_resource_type *vkd3d_dxbc_compiler_enable_resource_type( + struct vkd3d_dxbc_compiler *compiler, enum vkd3d_shader_resource_type resource_type, bool is_uav) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_spirv_resource_type *resource_type_info; + + if (!(resource_type_info = vkd3d_get_spirv_resource_type(resource_type))) + return NULL; + + if (resource_type_info->capability) + vkd3d_spirv_enable_capability(builder, resource_type_info->capability); + if (is_uav && resource_type_info->uav_capability) + vkd3d_spirv_enable_capability(builder, resource_type_info->uav_capability); + + return resource_type_info; +} + +static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_type data_type) +{ + /* The following formats are supported by Direct3D 11 hardware for UAV + * typed loads. A newer hardware may support more formats for UAV typed + * loads (see StorageImageReadWithoutFormat SPIR-V capability). + */ + switch (data_type) + { + case VKD3D_SHADER_COMPONENT_FLOAT: + return SpvImageFormatR32f; + case VKD3D_SHADER_COMPONENT_INT: + return SpvImageFormatR32i; + case VKD3D_SHADER_COMPONENT_UINT: + return SpvImageFormatR32ui; + default: + FIXME("Unhandled type %#x.\n", data_type); + return SpvImageFormatUnknown; + } +} + +static const struct vkd3d_shader_descriptor_info *vkd3d_dxbc_compiler_get_descriptor_info( + struct vkd3d_dxbc_compiler *compiler, enum vkd3d_shader_descriptor_type type, + const struct vkd3d_shader_register_range *range) +{ + const struct vkd3d_shader_scan_descriptor_info *descriptor_info = compiler->scan_descriptor_info; + unsigned int register_last = (range->last == ~0u) ? range->first : range->last; + const struct vkd3d_shader_descriptor_info *d; + unsigned int i; + + for (i = 0; i < descriptor_info->descriptor_count; ++i) + { + d = &descriptor_info->descriptors[i]; + if (d->type == type && d->register_space == range->space && d->register_index <= range->first + && (d->count == ~0u || d->count > register_last - d->register_index)) + return d; + } + + return NULL; +} + +static uint32_t vkd3d_dxbc_compiler_get_image_type_id(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, + const struct vkd3d_spirv_resource_type *resource_type_info, enum vkd3d_shader_component_type data_type, + bool raw_structured, uint32_t depth) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_descriptor_info *d; + uint32_t sampled_type_id; + SpvImageFormat format; + + format = SpvImageFormatUnknown; + if (reg->type == VKD3DSPR_UAV) + { + d = vkd3d_dxbc_compiler_get_descriptor_info(compiler, + VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); + if (raw_structured || (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) + format = image_format_for_image_read(data_type); + } + + sampled_type_id = vkd3d_spirv_get_type_id(builder, data_type, 1); + return vkd3d_spirv_get_op_type_image(builder, sampled_type_id, resource_type_info->dim, + depth, resource_type_info->arrayed, resource_type_info->ms, + reg->type == VKD3DSPR_UAV ? 2 : 1, format); +} + +static void vkd3d_dxbc_compiler_emit_combined_sampler_declarations(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *resource, const struct vkd3d_shader_register_range *resource_range, + enum vkd3d_shader_resource_type resource_type, enum vkd3d_shader_component_type sampled_type, + unsigned int structure_stride, bool raw, const struct vkd3d_spirv_resource_type *resource_type_info) +{ + const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; + const SpvStorageClass storage_class = SpvStorageClassUniformConstant; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_combined_resource_sampler *current; + uint32_t image_type_id, type_id, ptr_type_id, var_id; + enum vkd3d_shader_binding_flag resource_type_flag; + const struct vkd3d_shader_descriptor_info *d; + struct vkd3d_symbol symbol; + unsigned int i; + bool depth; + + resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER + ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + + for (i = 0; i < shader_interface->combined_sampler_count; ++i) + { + struct vkd3d_shader_register_range sampler_range; + + current = &shader_interface->combined_samplers[i]; + + if (current->resource_space != resource_range->space || current->resource_index != resource_range->first) + continue; + + if (!(current->flags & resource_type_flag)) + continue; + + if (!vkd3d_dxbc_compiler_check_shader_visibility(compiler, current->shader_visibility)) + continue; + + if (current->binding.count != 1) + { + FIXME("Descriptor arrays are not supported.\n"); + vkd3d_dxbc_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_DESCRIPTOR_BINDING, + "Combined descriptor binding for resource %u, space %u, " + "and sampler %u, space %u has unsupported ‘count’ %u.", + resource_range->first, resource_range->space, current->sampler_index, + current->sampler_space, current->binding.count); + } + + sampler_range.space = current->sampler_space; + sampler_range.first = current->sampler_index; + sampler_range.last = current->sampler_index; + d = vkd3d_dxbc_compiler_get_descriptor_info(compiler, + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler_range); + depth = current->sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX + && (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE); + + image_type_id = vkd3d_dxbc_compiler_get_image_type_id(compiler, resource, resource_range, + resource_type_info, sampled_type, structure_stride || raw, depth); + type_id = vkd3d_spirv_get_op_type_sampled_image(builder, image_type_id); + + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); + var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, + ptr_type_id, storage_class, 0); + + vkd3d_dxbc_compiler_emit_descriptor_binding(compiler, var_id, ¤t->binding); + + if (current->sampler_index == VKD3D_SHADER_DUMMY_SAMPLER_INDEX) + vkd3d_spirv_build_op_name(builder, var_id, "t%u_%u_dummy_sampler", resource_range->space, + resource_range->first); + else + vkd3d_spirv_build_op_name(builder, var_id, "t%u_%u_s%u_%u", resource_range->space, resource_range->first, + current->sampler_space, current->sampler_index); + + vkd3d_symbol_make_combined_sampler(&symbol, resource, + current->sampler_index == VKD3D_SHADER_DUMMY_SAMPLER_INDEX ? 0 : current->sampler_space, + current->sampler_index); + symbol.id = var_id; + symbol.info.resource.range = *resource_range; + symbol.info.resource.sampled_type = sampled_type; + symbol.info.resource.type_id = image_type_id; + symbol.info.resource.resource_type_info = resource_type_info; + symbol.info.resource.structure_stride = structure_stride; + symbol.info.resource.raw = raw; + symbol.info.resource.uav_counter_id = 0; + symbol.info.resource.uav_counter_array = NULL; + symbol.info.resource.uav_counter_base_idx = 0; + vkd3d_dxbc_compiler_put_symbol(compiler, &symbol); + } +} + +static void vkd3d_dxbc_compiler_emit_resource_declaration(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, + enum vkd3d_data_type resource_data_type, unsigned int structure_stride, bool raw) +{ + struct vkd3d_descriptor_variable_info var_info, counter_var_info; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + SpvStorageClass storage_class = SpvStorageClassUniformConstant; + uint32_t counter_type_id, type_id, var_id, counter_var_id = 0; + const struct vkd3d_shader_register *reg = &resource->reg.reg; + const struct vkd3d_spirv_resource_type *resource_type_info; + enum vkd3d_shader_component_type sampled_type; + struct vkd3d_symbol resource_symbol; + bool is_uav; + + is_uav = reg->type == VKD3DSPR_UAV; + if (!(resource_type_info = vkd3d_dxbc_compiler_enable_resource_type(compiler, + resource_type, is_uav))) + { + FIXME("Unrecognized resource type.\n"); + return; + } + + sampled_type = vkd3d_component_type_from_data_type(resource_data_type); + + if (vkd3d_dxbc_compiler_has_combined_sampler(compiler, resource, NULL)) + { + vkd3d_dxbc_compiler_emit_combined_sampler_declarations(compiler, reg, &resource->range, + resource_type, sampled_type, structure_stride, raw, resource_type_info); + return; + } + + if (compiler->ssbo_uavs && is_uav && resource_type == VKD3D_SHADER_RESOURCE_BUFFER) + { + uint32_t array_type_id, struct_id; + + type_id = vkd3d_spirv_get_type_id(builder, sampled_type, 1); + + array_type_id = vkd3d_spirv_get_op_type_runtime_array(builder, type_id); + vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, 4); + + struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1); + vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBufferBlock, NULL, 0); + vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, SpvDecorationOffset, 0); + + type_id = struct_id; + storage_class = SpvStorageClassUniform; + } + else + { + type_id = vkd3d_dxbc_compiler_get_image_type_id(compiler, reg, &resource->range, + resource_type_info, sampled_type, structure_stride || raw, 0); + } + + var_id = vkd3d_dxbc_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, + &resource->range, resource_type, false, &var_info); + + if (is_uav) + { + const struct vkd3d_shader_descriptor_info *d; + + d = vkd3d_dxbc_compiler_get_descriptor_info(compiler, + VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &resource->range); + + if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) + vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); + + if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) + { + assert(structure_stride); /* counters are valid only for structured buffers */ + + counter_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + if (vkd3d_dxbc_compiler_is_opengl_target(compiler)) + { + vkd3d_spirv_enable_capability(builder, SpvCapabilityAtomicStorage); + storage_class = SpvStorageClassAtomicCounter; + type_id = counter_type_id; + } + else if (compiler->ssbo_uavs) + { + uint32_t length_id, array_type_id, struct_id; + + length_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, 1); + array_type_id = vkd3d_spirv_build_op_type_array(builder, counter_type_id, length_id); + vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, 4); + + struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1); + vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBufferBlock, NULL, 0); + vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, SpvDecorationOffset, 0); + + storage_class = SpvStorageClassUniform; + type_id = struct_id; + } + + counter_var_id = vkd3d_dxbc_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, + &resource->range, resource_type, true, &counter_var_info); + } + } + + vkd3d_symbol_make_resource(&resource_symbol, reg); + resource_symbol.id = var_id; + resource_symbol.descriptor_array = var_info.array_symbol; + resource_symbol.info.resource.range = resource->range; + resource_symbol.info.resource.sampled_type = sampled_type; + resource_symbol.info.resource.type_id = type_id; + resource_symbol.info.resource.resource_type_info = resource_type_info; + resource_symbol.info.resource.structure_stride = structure_stride; + resource_symbol.info.resource.raw = raw; + resource_symbol.info.resource.binding_base_idx = var_info.binding_base_idx; + resource_symbol.info.resource.uav_counter_id = counter_var_id; + resource_symbol.info.resource.uav_counter_array = counter_var_info.array_symbol; + resource_symbol.info.resource.uav_counter_base_idx = counter_var_info.binding_base_idx; + vkd3d_dxbc_compiler_put_symbol(compiler, &resource_symbol); +} + +static void vkd3d_dxbc_compiler_emit_dcl_resource(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; + uint32_t flags = instruction->flags; + + /* We don't distinguish between APPEND and COUNTER UAVs. */ + flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; + if (flags) + FIXME("Unhandled UAV flags %#x.\n", flags); + + vkd3d_dxbc_compiler_emit_resource_declaration(compiler, &semantic->resource, + semantic->resource_type, semantic->resource_data_type[0], 0, false); +} + +static void vkd3d_dxbc_compiler_emit_dcl_resource_raw(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_raw_resource *resource = &instruction->declaration.raw_resource; + uint32_t flags = instruction->flags; + + /* We don't distinguish between APPEND and COUNTER UAVs. */ + flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; + if (flags) + FIXME("Unhandled UAV flags %#x.\n", flags); + + vkd3d_dxbc_compiler_emit_resource_declaration(compiler, &resource->resource, + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, 0, true); +} + +static void vkd3d_dxbc_compiler_emit_dcl_resource_structured(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_structured_resource *resource = &instruction->declaration.structured_resource; + unsigned int stride = resource->byte_stride; + uint32_t flags = instruction->flags; + + /* We don't distinguish between APPEND and COUNTER UAVs. */ + flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; + if (flags) + FIXME("Unhandled UAV flags %#x.\n", flags); + + vkd3d_dxbc_compiler_emit_resource_declaration(compiler, &resource->resource, + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, stride / 4, false); +} + +static void vkd3d_dxbc_compiler_emit_workgroup_memory(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) +{ + uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const SpvStorageClass storage_class = SpvStorageClassWorkgroup; + struct vkd3d_symbol reg_symbol; + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + length_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, size); + array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + + pointer_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, array_type_id); + var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, + pointer_type_id, storage_class, 0); + + vkd3d_dxbc_compiler_emit_register_debug_name(builder, var_id, reg); + + vkd3d_symbol_make_register(®_symbol, reg); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + VKD3D_SHADER_COMPONENT_UINT, VKD3DSP_WRITEMASK_0); + reg_symbol.info.reg.structure_stride = structure_stride; + vkd3d_dxbc_compiler_put_symbol(compiler, ®_symbol); +} + +static void vkd3d_dxbc_compiler_emit_dcl_tgsm_raw(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw; + vkd3d_dxbc_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, + tgsm_raw->byte_count / 4, 0); +} + +static void vkd3d_dxbc_compiler_emit_dcl_tgsm_structured(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured; + unsigned int stride = tgsm_structured->byte_stride / 4; + vkd3d_dxbc_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, + tgsm_structured->structure_count * stride, stride); +} + +static void vkd3d_dxbc_compiler_emit_dcl_input(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; + const struct vkd3d_shader_phase *phase; + + if ((phase = vkd3d_dxbc_compiler_get_current_shader_phase(compiler))) + vkd3d_dxbc_compiler_emit_shader_phase_input(compiler, phase, dst); + else if (vkd3d_shader_register_is_input(&dst->reg) || dst->reg.type == VKD3DSPR_PATCHCONST) + vkd3d_dxbc_compiler_emit_input(compiler, dst, VKD3D_SIV_NONE, VKD3DSIM_NONE); + else + vkd3d_dxbc_compiler_emit_input_register(compiler, dst); + + if (dst->reg.type == VKD3DSPR_OUTCONTROLPOINT) + compiler->use_vocp = true; +} + +static void vkd3d_dxbc_compiler_emit_dcl_input_ps(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + vkd3d_dxbc_compiler_emit_input(compiler, &instruction->declaration.dst, VKD3D_SIV_NONE, instruction->flags); +} + +static void vkd3d_dxbc_compiler_emit_dcl_input_ps_sysval(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_register_semantic *semantic = &instruction->declaration.register_semantic; + + vkd3d_dxbc_compiler_emit_input(compiler, &semantic->reg, semantic->sysval_semantic, instruction->flags); +} + +static void vkd3d_dxbc_compiler_emit_dcl_input_sysval(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + vkd3d_dxbc_compiler_emit_input(compiler, &instruction->declaration.register_semantic.reg, + instruction->declaration.register_semantic.sysval_semantic, VKD3DSIM_NONE); +} + +static void vkd3d_dxbc_compiler_emit_dcl_output(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; + + if (vkd3d_shader_register_is_output(&dst->reg)) + vkd3d_dxbc_compiler_emit_output(compiler, dst, VKD3D_SIV_NONE); + else + vkd3d_dxbc_compiler_emit_output_register(compiler, dst); +} + +static void vkd3d_dxbc_compiler_emit_dcl_output_siv(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + enum vkd3d_shader_input_sysval_semantic sysval; + const struct vkd3d_shader_dst_param *dst; + + dst = &instruction->declaration.register_semantic.reg; + sysval = instruction->declaration.register_semantic.sysval_semantic; + + vkd3d_dxbc_compiler_emit_output(compiler, dst, sysval); +} + +static bool vkd3d_dxbc_compiler_check_index_range(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_index_range *range) +{ + const struct vkd3d_shader_register *reg = &range->dst.reg; + struct vkd3d_shader_register_info reg_info; + struct vkd3d_shader_register current_reg; + struct vkd3d_symbol reg_symbol; + unsigned int i; + uint32_t id; + + current_reg = *reg; + vkd3d_symbol_make_register(®_symbol, ¤t_reg); + if (!vkd3d_dxbc_compiler_get_register_info(compiler, ¤t_reg, ®_info)) + { + ERR("Failed to get register info.\n"); + return false; + } + + /* FIXME: We should check if it's an array. */ + if (!reg_info.is_aggregate) + { + FIXME("Unhandled register %#x.\n", reg->type); + return false; + } + id = reg_info.id; + + for (i = reg->idx[0].offset; i < reg->idx[0].offset + range->register_count; ++i) + { + current_reg.idx[0].offset = i; + vkd3d_symbol_make_register(®_symbol, ¤t_reg); + + if (range->dst.write_mask != reg_info.write_mask + || vkd3d_write_mask_component_count(reg_info.write_mask) != 1) + { + FIXME("Unhandled index range write mask %#x (%#x).\n", + range->dst.write_mask, reg_info.write_mask); + return false; + } + + if (reg_info.id != id) + { + FIXME("Unhandled index range %#x, %u.\n", reg->type, i); + return false; + } + } + + return true; +} + +static void vkd3d_dxbc_compiler_emit_dcl_index_range(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_index_range *range = &instruction->declaration.index_range; + + if (!vkd3d_dxbc_compiler_check_index_range(compiler, range)) + FIXME("Ignoring dcl_index_range %#x %u.\n", range->dst.reg.type, range->register_count); +} + +static void vkd3d_dxbc_compiler_emit_dcl_stream(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + unsigned int stream_idx = instruction->src[0].reg.idx[0].offset; + + if (stream_idx) + FIXME("Multiple streams are not supported yet.\n"); +} + +static void vkd3d_dxbc_compiler_emit_output_vertex_count(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + vkd3d_dxbc_compiler_emit_execution_mode1(compiler, + SpvExecutionModeOutputVertices, instruction->declaration.count); +} + +static void vkd3d_dxbc_compiler_emit_dcl_input_primitive(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + enum vkd3d_primitive_type primitive_type = instruction->declaration.primitive_type.type; + SpvExecutionMode mode; + + switch (primitive_type) + { + case VKD3D_PT_POINTLIST: + mode = SpvExecutionModeInputPoints; + break; + case VKD3D_PT_LINELIST: + mode = SpvExecutionModeInputLines; + break; + case VKD3D_PT_LINELIST_ADJ: + mode = SpvExecutionModeInputLinesAdjacency; + break; + case VKD3D_PT_TRIANGLELIST: + mode = SpvExecutionModeTriangles; + break; + case VKD3D_PT_TRIANGLELIST_ADJ: + mode = SpvExecutionModeInputTrianglesAdjacency; + break; + default: + FIXME("Unhandled primitive type %#x.\n", primitive_type); + return; + } + + vkd3d_dxbc_compiler_emit_execution_mode(compiler, mode, NULL, 0); +} + +static void vkd3d_dxbc_compiler_emit_point_size(struct vkd3d_dxbc_compiler *compiler) +{ + static const struct vkd3d_spirv_builtin point_size = {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInPointSize}; + + /* Set the point size. Point sprites are not supported in d3d10+, but + * point primitives can still be used with e.g. stream output. Vulkan + * requires the point size to always be explicitly defined when outputting + * points. */ + vkd3d_spirv_build_op_store(&compiler->spirv_builder, + vkd3d_dxbc_compiler_emit_builtin_variable(compiler, &point_size, SpvStorageClassOutput, 0), + vkd3d_dxbc_compiler_get_constant_float(compiler, 1.0f), SpvMemoryAccessMaskNone); +} + +static void vkd3d_dxbc_compiler_emit_dcl_output_topology(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + enum vkd3d_primitive_type primitive_type = instruction->declaration.primitive_type.type; + SpvExecutionMode mode; + + switch (primitive_type) + { + case VKD3D_PT_POINTLIST: + mode = SpvExecutionModeOutputPoints; + vkd3d_dxbc_compiler_emit_point_size(compiler); + break; + case VKD3D_PT_LINESTRIP: + mode = SpvExecutionModeOutputLineStrip; + break; + case VKD3D_PT_TRIANGLESTRIP: + mode = SpvExecutionModeOutputTriangleStrip; + break; + default: + ERR("Unexpected primitive type %#x.\n", primitive_type); + return; + } + + vkd3d_dxbc_compiler_emit_execution_mode(compiler, mode, NULL, 0); +} + +static void vkd3d_dxbc_compiler_emit_dcl_gs_instances(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + compiler->spirv_builder.invocation_count = instruction->declaration.count; +} + +static void vkd3d_dxbc_compiler_emit_dcl_tessellator_domain(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + enum vkd3d_tessellator_domain domain = instruction->declaration.tessellator_domain; + SpvExecutionMode mode; + + if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && vkd3d_dxbc_compiler_is_opengl_target(compiler)) + return; + + switch (domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: + mode = SpvExecutionModeIsolines; + break; + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + mode = SpvExecutionModeTriangles; + break; + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + mode = SpvExecutionModeQuads; + break; + default: + FIXME("Invalid tessellator domain %#x.\n", domain); + return; + } + + vkd3d_dxbc_compiler_emit_execution_mode(compiler, mode, NULL, 0); +} + +static void vkd3d_dxbc_compiler_emit_tessellator_output_primitive(struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_tessellator_output_primitive primitive) +{ + SpvExecutionMode mode; + + if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && vkd3d_dxbc_compiler_is_opengl_target(compiler)) + return; + + switch (primitive) + { + case VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT: + mode = SpvExecutionModePointMode; + break; + case VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE: + return; + case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW: + mode = SpvExecutionModeVertexOrderCw; + break; + case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW: + mode = SpvExecutionModeVertexOrderCcw; + break; + default: + FIXME("Invalid tessellator output primitive %#x.\n", primitive); + return; + } + + vkd3d_dxbc_compiler_emit_execution_mode(compiler, mode, NULL, 0); +} + +static void vkd3d_dxbc_compiler_emit_tessellator_partitioning(struct vkd3d_dxbc_compiler *compiler, + enum vkd3d_shader_tessellator_partitioning partitioning) +{ + SpvExecutionMode mode; + + if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && vkd3d_dxbc_compiler_is_opengl_target(compiler)) + return; + + switch (partitioning) + { + case VKD3D_SHADER_TESSELLATOR_PARTITIONING_INTEGER: + case VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2: + mode = SpvExecutionModeSpacingEqual; + break; + case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: + mode = SpvExecutionModeSpacingFractionalOdd; + break; + case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: + mode = SpvExecutionModeSpacingFractionalEven; + break; + default: + FIXME("Invalid tessellator partitioning %#x.\n", partitioning); + return; + } + + vkd3d_dxbc_compiler_emit_execution_mode(compiler, mode, NULL, 0); +} + +static void vkd3d_dxbc_compiler_emit_dcl_thread_group(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_thread_group_size *group_size = &instruction->declaration.thread_group_size; + const uint32_t local_size[] = {group_size->x, group_size->y, group_size->z}; + + vkd3d_dxbc_compiler_emit_execution_mode(compiler, + SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); +} + +static void vkd3d_dxbc_compiler_leave_shader_phase(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_phase *phase) +{ + const struct vkd3d_shader_signature *signature = compiler->output_signature; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + struct vkd3d_symbol reg_symbol, *symbol; + struct vkd3d_shader_register reg; + struct rb_entry *entry; + unsigned int i; + + vkd3d_spirv_build_op_function_end(builder); + + compiler->temp_id = 0; + compiler->temp_count = 0; + + /* + * vocp inputs in fork and join shader phases are outputs of the control + * point phase. Reinsert symbols for vocp registers while leaving the + * control point phase. + */ + if (is_control_point_phase(phase)) + { + if (compiler->epilogue_function_id) + { + vkd3d_dxbc_compiler_emit_shader_phase_name(compiler, compiler->epilogue_function_id, phase, "_epilogue"); + vkd3d_dxbc_compiler_emit_shader_epilogue_function(compiler); + } + + memset(®, 0, sizeof(reg)); + reg.idx[1].offset = ~0u; + + /* Fork and join phases share output registers (patch constants). + * Control point phase has separate output registers. */ + memset(compiler->output_info, 0, signature->element_count * sizeof(*compiler->output_info)); + memset(compiler->private_output_variable, 0, sizeof(compiler->private_output_variable)); + memset(compiler->private_output_variable_array_idx, 0, sizeof(compiler->private_output_variable_array_idx)); + memset(compiler->private_output_variable_write_mask, 0, sizeof(compiler->private_output_variable_write_mask)); + + for (i = 0; i < signature->element_count; ++i) + { + const struct vkd3d_shader_signature_element *e = &signature->elements[i]; + + reg.type = VKD3DSPR_OUTPUT; + reg.idx[0].offset = e->register_index; + vkd3d_symbol_make_register(®_symbol, ®); + if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) + { + rb_remove(&compiler->symbol_table, entry); + + symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); + + reg.type = VKD3DSPR_OUTCONTROLPOINT; + reg.idx[1].offset = reg.idx[0].offset; + reg.idx[0].offset = compiler->output_control_point_count; + vkd3d_symbol_make_register(symbol, ®); + symbol->info.reg.is_aggregate = false; + + if (rb_put(&compiler->symbol_table, symbol, entry) == -1) + { + ERR("Failed to insert vocp symbol entry (%s).\n", debug_vkd3d_symbol(symbol)); + vkd3d_symbol_free(entry, NULL); + } + } + } + } + + if (phase->instance_count) + { + memset(®, 0, sizeof(reg)); + reg.type = phase->type == VKD3DSIH_HS_FORK_PHASE ? VKD3DSPR_FORKINSTID : VKD3DSPR_JOININSTID; + reg.idx[0].offset = ~0u; + reg.idx[1].offset = ~0u; + vkd3d_symbol_make_register(®_symbol, ®); + if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) + { + rb_remove(&compiler->symbol_table, entry); + vkd3d_symbol_free(entry, NULL); + } + } +} + +static void vkd3d_dxbc_compiler_enter_shader_phase(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_phase *previous_phase; + struct vkd3d_shader_phase *phase; + + if ((previous_phase = vkd3d_dxbc_compiler_get_current_shader_phase(compiler))) + vkd3d_dxbc_compiler_leave_shader_phase(compiler, previous_phase); + + if (!vkd3d_array_reserve((void **)&compiler->shader_phases, &compiler->shader_phases_size, + compiler->shader_phase_count + 1, sizeof(*compiler->shader_phases))) + return; + phase = &compiler->shader_phases[compiler->shader_phase_count]; + + phase->type = instruction->handler_idx; + phase->idx = compiler->shader_phase_count; + phase->instance_count = 0; + phase->function_id = 0; + phase->instance_id = 0; + phase->function_location = 0; + + ++compiler->shader_phase_count; +} + +static int vkd3d_dxbc_compiler_emit_shader_phase_instance_count(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_shader_phase *phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; + + if (!compiler->shader_phase_count + || (phase->type != VKD3DSIH_HS_FORK_PHASE && phase->type != VKD3DSIH_HS_JOIN_PHASE) + || phase->function_id) + { + WARN("Unexpected dcl_hs_{fork,join}_phase_instance_count instruction.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + phase->instance_count = instruction->declaration.count; + + vkd3d_dxbc_compiler_begin_shader_phase(compiler, phase); + + return VKD3D_OK; +} + +static const struct vkd3d_shader_phase *vkd3d_dxbc_compiler_get_control_point_phase( + struct vkd3d_dxbc_compiler *compiler) +{ + const struct vkd3d_shader_phase *phase; + + if (compiler->shader_phase_count < 1) + return NULL; + + phase = &compiler->shader_phases[0]; + if (is_control_point_phase(phase)) + return phase; + + return NULL; +} + +static void vkd3d_dxbc_compiler_emit_default_control_point_phase(struct vkd3d_dxbc_compiler *compiler) +{ + const struct vkd3d_shader_signature *output_signature = compiler->output_signature; + const struct vkd3d_shader_signature *input_signature = compiler->input_signature; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + enum vkd3d_shader_component_type component_type; + uint32_t input_id, output_id, dst_id, src_id; + struct vkd3d_shader_src_param invocation; + struct vkd3d_shader_register input_reg; + uint32_t type_id, output_ptr_type_id; + unsigned int component_count; + uint32_t invocation_id; + unsigned int i; + + invocation_id = vkd3d_dxbc_compiler_emit_load_invocation_id(compiler); + + memset(&invocation, 0, sizeof(invocation)); + invocation.reg.type = VKD3DSPR_OUTPOINTID; + invocation.reg.data_type = VKD3D_DATA_INT; + invocation.reg.idx[0].offset = ~0u; + invocation.reg.idx[1].offset = ~0u; + invocation.reg.idx[2].offset = ~0u; + invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE; + + memset(&input_reg, 0, sizeof(input_reg)); + input_reg.type = VKD3DSPR_INPUT; + input_reg.data_type = VKD3D_DATA_FLOAT; + input_reg.idx[0].rel_addr = &invocation; + input_reg.idx[2].offset = ~0u; + input_id = vkd3d_dxbc_compiler_get_register_id(compiler, &input_reg); + + assert(input_signature->element_count == output_signature->element_count); + for (i = 0; i < output_signature->element_count; ++i) + { + const struct vkd3d_shader_signature_element *output = &output_signature->elements[i]; + const struct vkd3d_shader_signature_element *input = &input_signature->elements[i]; + + assert(input->mask == output->mask); + assert(input->component_type == output->component_type); + + input_reg.idx[1].offset = input->register_index; + input_id = vkd3d_dxbc_compiler_get_register_id(compiler, &input_reg); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); + src_id = vkd3d_spirv_build_op_load(builder, type_id, input_id, SpvMemoryAccessMaskNone); + + component_type = output->component_type; + component_count = vkd3d_write_mask_component_count(output->mask); + output_id = vkd3d_dxbc_compiler_emit_array_variable(compiler, &builder->global_stream, + SpvStorageClassOutput, component_type, component_count, compiler->output_control_point_count); + vkd3d_spirv_add_iface_variable(builder, output_id); + vkd3d_spirv_build_op_decorate1(builder, output_id, SpvDecorationLocation, output->register_index); + vkd3d_spirv_build_op_name(builder, output_id, "vocp%u", output->register_index); + + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); + dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_id, invocation_id); + + vkd3d_dxbc_compiler_emit_store(compiler, dst_id, output->mask, + component_type, SpvStorageClassOutput, VKD3DSP_WRITEMASK_ALL, src_id); + } +} + +static void vkd3d_dxbc_compiler_emit_barrier(struct vkd3d_dxbc_compiler *compiler, + SpvScope execution_scope, SpvScope memory_scope, SpvMemorySemanticsMask semantics) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t execution_id, memory_id, semantics_id; + + memory_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, memory_scope); + semantics_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, semantics); + + if (execution_scope != SpvScopeMax) + { + execution_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, execution_scope); + vkd3d_spirv_build_op_control_barrier(builder, execution_id, memory_id, semantics_id); + } + else + { + vkd3d_spirv_build_op_memory_barrier(builder, memory_id, semantics_id); + } +} + +static void vkd3d_dxbc_compiler_emit_hull_shader_barrier(struct vkd3d_dxbc_compiler *compiler) +{ + vkd3d_dxbc_compiler_emit_barrier(compiler, + SpvScopeWorkgroup, SpvScopeInvocation, SpvMemorySemanticsMaskNone); +} + +static void vkd3d_dxbc_compiler_emit_hull_shader_input_initialisation(struct vkd3d_dxbc_compiler *compiler) +{ + uint32_t type_id, length_id, register_index_id, src_array_id, dst_array_id, vicp_id, tmp_id; + const struct vkd3d_shader_signature *signature = compiler->input_signature; + uint32_t src_type_id, dst_type_id, src_id, dst_id, point_index_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_signature_element *element; + enum vkd3d_shader_input_sysval_semantic sysval; + const struct vkd3d_spirv_builtin *builtin; + struct vkd3d_symbol *symbol, symbol_key; + unsigned int register_count, i, j; + struct vkd3d_shader_register r; + struct rb_entry *entry; + uint32_t indices[2]; + + for (i = 0, register_count = 0; i < signature->element_count; ++i) + { + register_count = max(register_count, signature->elements[i].register_index + 1); + } + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); + length_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, compiler->input_control_point_count); + type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); + + memset(&r, 0, sizeof(r)); + r.type = VKD3DSPR_INPUT; + r.idx[0].offset = 0; + r.idx[1].offset = ~0u; + vkd3d_symbol_make_register(&symbol_key, &r); + + for (i = 0; i < signature->element_count; ++i) + { + element = &signature->elements[i]; + + symbol_key.key.reg.idx = element->register_index; + entry = rb_get(&compiler->symbol_table, &symbol_key); + symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); + + vicp_id = symbol->id; + register_index_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, element->register_index); + dst_array_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, type_id, vicp_id, register_index_id); + + if (element->sysval_semantic) + { + sysval = vkd3d_siv_from_sysval(element->sysval_semantic); + builtin = get_spirv_builtin_for_sysval(compiler, sysval); + src_array_id = vkd3d_dxbc_compiler_emit_builtin_variable(compiler, builtin, + SpvStorageClassInput, compiler->input_control_point_count); + + if (builtin->component_count == 4) + { + vkd3d_spirv_build_op_copy_memory(builder, dst_array_id, src_array_id, SpvMemoryAccessMaskNone); + } + else + { + tmp_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, builtin->component_count); + src_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, tmp_id); + dst_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, tmp_id); + + for (j = 0; j < compiler->input_control_point_count; ++j) + { + point_index_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, j); + src_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, + src_type_id, src_array_id, point_index_id); + + indices[0] = point_index_id; + indices[1] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0); + dst_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, + dst_type_id, dst_array_id, indices, 2); + + vkd3d_spirv_build_op_copy_memory(builder, dst_id, src_id, SpvMemoryAccessMaskNone); + } + } + } + else + { + src_array_id = vkd3d_dxbc_compiler_emit_array_variable(compiler, &builder->global_stream, + SpvStorageClassInput, VKD3D_SHADER_COMPONENT_FLOAT, 4, compiler->input_control_point_count); + vkd3d_spirv_add_iface_variable(builder, src_array_id); + vkd3d_spirv_build_op_decorate1(builder, src_array_id, SpvDecorationLocation, element->register_index); + vkd3d_spirv_build_op_name(builder, src_array_id, "v%u", element->register_index); + + vkd3d_spirv_build_op_copy_memory(builder, dst_array_id, src_array_id, SpvMemoryAccessMaskNone); + } + symbol->info.reg.dcl_mask |= element->mask; + } +} + +static void vkd3d_dxbc_compiler_emit_shader_epilogue_invocation(struct vkd3d_dxbc_compiler *compiler) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t void_id, type_id, ptr_type_id, function_id; + uint32_t arguments[MAX_REG_OUTPUT]; + unsigned int i, count; + + if ((function_id = compiler->epilogue_function_id)) + { + void_id = vkd3d_spirv_get_op_type_void(builder); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); + for (i = 0, count = 0; i < ARRAY_SIZE(compiler->private_output_variable); ++i) + { + if (compiler->private_output_variable[i]) + { + uint32_t argument_id = compiler->private_output_variable[i]; + unsigned int argument_idx = count++; + + if (compiler->private_output_variable_array_idx[i]) + { + uint32_t tmp_id; + + tmp_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, + argument_id, compiler->private_output_variable_array_idx[i]); + tmp_id = vkd3d_spirv_build_op_load(builder, type_id, tmp_id, SpvMemoryAccessMaskNone); + argument_id = vkd3d_spirv_build_op_variable(builder, + &builder->global_stream, ptr_type_id, SpvStorageClassPrivate, 0); + vkd3d_spirv_build_op_store(builder, argument_id, tmp_id, SpvMemoryAccessMaskNone); + } + + arguments[argument_idx] = argument_id; + } + } + + vkd3d_spirv_build_op_function_call(builder, void_id, function_id, arguments, count); + } +} + +static void vkd3d_dxbc_compiler_emit_hull_shader_main(struct vkd3d_dxbc_compiler *compiler) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_phase *control_point_phase, *phase; + uint32_t phase_instance_id; + unsigned int i, j; + uint32_t void_id; + + vkd3d_spirv_builder_begin_main_function(builder); + + vkd3d_dxbc_compiler_emit_hull_shader_input_initialisation(compiler); + + void_id = vkd3d_spirv_get_op_type_void(builder); + + if ((control_point_phase = vkd3d_dxbc_compiler_get_control_point_phase(compiler))) + vkd3d_spirv_build_op_function_call(builder, void_id, control_point_phase->function_id, NULL, 0); + else + vkd3d_dxbc_compiler_emit_default_control_point_phase(compiler); + + if (compiler->use_vocp) + vkd3d_dxbc_compiler_emit_hull_shader_barrier(compiler); + + for (i = 0; i < compiler->shader_phase_count; ++i) + { + phase = &compiler->shader_phases[i]; + if (is_control_point_phase(phase)) + continue; + + if (phase->instance_count) + { + for (j = 0; j < phase->instance_count; ++j) + { + phase_instance_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, j); + vkd3d_spirv_build_op_function_call(builder, + void_id, phase->function_id, &phase_instance_id, 1); + } + } + else + { + vkd3d_spirv_build_op_function_call(builder, void_id, phase->function_id, NULL, 0); + } + } + + vkd3d_dxbc_compiler_emit_shader_epilogue_invocation(compiler); + vkd3d_spirv_build_op_return(builder); + vkd3d_spirv_build_op_function_end(builder); +} + +static SpvOp vkd3d_dxbc_compiler_map_alu_instruction(const struct vkd3d_shader_instruction *instruction) +{ + static const struct + { + enum vkd3d_shader_opcode handler_idx; + SpvOp spirv_op; + } + alu_ops[] = + { + {VKD3DSIH_ADD, SpvOpFAdd}, + {VKD3DSIH_AND, SpvOpBitwiseAnd}, + {VKD3DSIH_BFREV, SpvOpBitReverse}, + {VKD3DSIH_COUNTBITS, SpvOpBitCount}, + {VKD3DSIH_DADD, SpvOpFAdd}, + {VKD3DSIH_DDIV, SpvOpFDiv}, + {VKD3DSIH_DIV, SpvOpFDiv}, + {VKD3DSIH_DMUL, SpvOpFMul}, + {VKD3DSIH_DTOF, SpvOpFConvert}, + {VKD3DSIH_DTOI, SpvOpConvertFToS}, + {VKD3DSIH_DTOU, SpvOpConvertFToU}, + {VKD3DSIH_FTOD, SpvOpFConvert}, + {VKD3DSIH_FTOI, SpvOpConvertFToS}, + {VKD3DSIH_FTOU, SpvOpConvertFToU}, + {VKD3DSIH_IADD, SpvOpIAdd}, + {VKD3DSIH_INEG, SpvOpSNegate}, + {VKD3DSIH_ISHL, SpvOpShiftLeftLogical}, + {VKD3DSIH_ISHR, SpvOpShiftRightArithmetic}, + {VKD3DSIH_ITOD, SpvOpConvertSToF}, + {VKD3DSIH_ITOF, SpvOpConvertSToF}, + {VKD3DSIH_MUL, SpvOpFMul}, + {VKD3DSIH_NOT, SpvOpNot}, + {VKD3DSIH_OR, SpvOpBitwiseOr}, + {VKD3DSIH_USHR, SpvOpShiftRightLogical}, + {VKD3DSIH_UTOD, SpvOpConvertUToF}, + {VKD3DSIH_UTOF, SpvOpConvertUToF}, + {VKD3DSIH_XOR, SpvOpBitwiseXor}, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(alu_ops); ++i) + { + if (alu_ops[i].handler_idx == instruction->handler_idx) + return alu_ops[i].spirv_op; + } + + return SpvOpMax; +} + +static void vkd3d_dxbc_compiler_emit_alu_instruction(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t src_ids[VKD3D_DXBC_MAX_SOURCE_COUNT]; + uint32_t type_id, val_id; + unsigned int i; + SpvOp op; + + op = vkd3d_dxbc_compiler_map_alu_instruction(instruction); + if (op == SpvOpMax) + { + ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + return; + } + + assert(instruction->dst_count == 1); + assert(instruction->src_count <= VKD3D_DXBC_MAX_SOURCE_COUNT); + + type_id = vkd3d_dxbc_compiler_get_type_id_for_dst(compiler, dst); + + for (i = 0; i < instruction->src_count; ++i) + src_ids[i] = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[i], dst->write_mask); + + /* The SPIR-V specification states, "The resulting value is undefined if + * Shift is greater than or equal to the bit width of the components of + * Base." Direct3D applies only the lowest 5 bits of the shift. + * + * Microsoft fxc will compile immediate constants larger than 5 bits. + * Fixing up the constants would be more elegant, but the simplest way is + * to let this handle constants too. */ + if (instruction->handler_idx == VKD3DSIH_ISHL || instruction->handler_idx == VKD3DSIH_ISHR + || instruction->handler_idx == VKD3DSIH_USHR) + { + uint32_t mask_id = vkd3d_dxbc_compiler_get_constant_vector(compiler, + VKD3D_SHADER_COMPONENT_UINT, vkd3d_write_mask_component_count(dst->write_mask), 0x1f); + src_ids[1] = vkd3d_spirv_build_op_and(builder, type_id, src_ids[1], mask_id); + } + + val_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream, op, type_id, + src_ids, instruction->src_count); + if (instruction->flags & VKD3DSI_PRECISE_XYZW) + vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0); + + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +static enum GLSLstd450 vkd3d_dxbc_compiler_map_ext_glsl_instruction( + const struct vkd3d_shader_instruction *instruction) +{ + static const struct + { + enum vkd3d_shader_opcode handler_idx; + enum GLSLstd450 glsl_inst; + } + glsl_insts[] = + { + {VKD3DSIH_DFMA, GLSLstd450Fma}, + {VKD3DSIH_DMAX, GLSLstd450NMax}, + {VKD3DSIH_DMIN, GLSLstd450NMin}, + {VKD3DSIH_EXP, GLSLstd450Exp2}, + {VKD3DSIH_FIRSTBIT_HI, GLSLstd450FindUMsb}, + {VKD3DSIH_FIRSTBIT_LO, GLSLstd450FindILsb}, + {VKD3DSIH_FIRSTBIT_SHI, GLSLstd450FindSMsb}, + {VKD3DSIH_FRC, GLSLstd450Fract}, + {VKD3DSIH_IMAX, GLSLstd450SMax}, + {VKD3DSIH_IMIN, GLSLstd450SMin}, + {VKD3DSIH_LOG, GLSLstd450Log2}, + {VKD3DSIH_MAD, GLSLstd450Fma}, + {VKD3DSIH_MAX, GLSLstd450NMax}, + {VKD3DSIH_MIN, GLSLstd450NMin}, + {VKD3DSIH_ROUND_NE, GLSLstd450RoundEven}, + {VKD3DSIH_ROUND_NI, GLSLstd450Floor}, + {VKD3DSIH_ROUND_PI, GLSLstd450Ceil}, + {VKD3DSIH_ROUND_Z, GLSLstd450Trunc}, + {VKD3DSIH_RSQ, GLSLstd450InverseSqrt}, + {VKD3DSIH_SQRT, GLSLstd450Sqrt}, + {VKD3DSIH_UMAX, GLSLstd450UMax}, + {VKD3DSIH_UMIN, GLSLstd450UMin}, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(glsl_insts); ++i) + { + if (glsl_insts[i].handler_idx == instruction->handler_idx) + return glsl_insts[i].glsl_inst; + } + + return GLSLstd450Bad; +} + +static void vkd3d_dxbc_compiler_emit_ext_glsl_instruction(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t src_id[VKD3D_DXBC_MAX_SOURCE_COUNT]; + uint32_t instr_set_id, type_id, val_id; + enum GLSLstd450 glsl_inst; + unsigned int i; + + glsl_inst = vkd3d_dxbc_compiler_map_ext_glsl_instruction(instruction); + if (glsl_inst == GLSLstd450Bad) + { + ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + return; + } + + instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); + + assert(instruction->dst_count == 1); + assert(instruction->src_count <= VKD3D_DXBC_MAX_SOURCE_COUNT); + + type_id = vkd3d_dxbc_compiler_get_type_id_for_dst(compiler, dst); + + for (i = 0; i < instruction->src_count; ++i) + src_id[i] = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[i], dst->write_mask); + + val_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, + instr_set_id, glsl_inst, src_id, instruction->src_count); + + if (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI + || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI) + { + /* In D3D bits are numbered from the most significant bit. */ + val_id = vkd3d_spirv_build_op_isub(builder, type_id, + vkd3d_dxbc_compiler_get_constant_uint(compiler, 31), val_id); + } + + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void vkd3d_dxbc_compiler_emit_mov(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + struct vkd3d_shader_register_info dst_reg_info, src_reg_info; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t val_id, dst_val_id, type_id, dst_id, src_id; + uint32_t components[VKD3D_VEC4_SIZE]; + unsigned int i, component_count; + + if (src->reg.type == VKD3DSPR_IMMCONST || src->reg.type == VKD3DSPR_IMMCONST64 || dst->modifiers || src->modifiers) + goto general_implementation; + + vkd3d_dxbc_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); + vkd3d_dxbc_compiler_get_register_info(compiler, &src->reg, &src_reg_info); + + if (dst_reg_info.component_type != src_reg_info.component_type + || dst_reg_info.write_mask != src_reg_info.write_mask) + goto general_implementation; + + if (vkd3d_swizzle_is_equal(dst_reg_info.write_mask, src->swizzle, src_reg_info.write_mask)) + { + dst_id = vkd3d_dxbc_compiler_get_register_id(compiler, &dst->reg); + src_id = vkd3d_dxbc_compiler_get_register_id(compiler, &src->reg); + + vkd3d_spirv_build_op_copy_memory(builder, dst_id, src_id, SpvMemoryAccessMaskNone); + return; + } + + component_count = vkd3d_write_mask_component_count(dst->write_mask); + if (component_count != 1 && component_count != VKD3D_VEC4_SIZE + && dst_reg_info.write_mask == VKD3DSP_WRITEMASK_ALL) + { + dst_id = vkd3d_dxbc_compiler_get_register_id(compiler, &dst->reg); + src_id = vkd3d_dxbc_compiler_get_register_id(compiler, &src->reg); + + type_id = vkd3d_spirv_get_type_id(builder, dst_reg_info.component_type, VKD3D_VEC4_SIZE); + val_id = vkd3d_spirv_build_op_load(builder, type_id, src_id, SpvMemoryAccessMaskNone); + dst_val_id = vkd3d_spirv_build_op_load(builder, type_id, dst_id, SpvMemoryAccessMaskNone); + + for (i = 0; i < ARRAY_SIZE(components); ++i) + { + if (dst->write_mask & (VKD3DSP_WRITEMASK_0 << i)) + components[i] = VKD3D_VEC4_SIZE + vkd3d_swizzle_get_component(src->swizzle, i); + else + components[i] = i; + } + + val_id = vkd3d_spirv_build_op_vector_shuffle(builder, + type_id, dst_val_id, val_id, components, VKD3D_VEC4_SIZE); + + vkd3d_spirv_build_op_store(builder, dst_id, val_id, SpvMemoryAccessMaskNone); + return; + } + +general_implementation: + val_id = vkd3d_dxbc_compiler_emit_load_src(compiler, src, dst->write_mask); + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void vkd3d_dxbc_compiler_emit_movc(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t condition_id, src1_id, src2_id, type_id, val_id; + unsigned int component_count; + + condition_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], dst->write_mask); + src1_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[1], dst->write_mask); + src2_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[2], dst->write_mask); + + component_count = vkd3d_write_mask_component_count(dst->write_mask); + type_id = vkd3d_dxbc_compiler_get_type_id_for_dst(compiler, dst); + + condition_id = vkd3d_dxbc_compiler_emit_int_to_bool(compiler, + VKD3D_SHADER_CONDITIONAL_OP_NZ, component_count, condition_id); + val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, src1_id, src2_id); + + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void vkd3d_dxbc_compiler_emit_swapc(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t condition_id, src1_id, src2_id, type_id, val_id; + unsigned int component_count; + + assert(dst[0].write_mask == dst[1].write_mask); + + condition_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], dst->write_mask); + src1_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[1], dst->write_mask); + src2_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[2], dst->write_mask); + + component_count = vkd3d_write_mask_component_count(dst->write_mask); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, component_count); + + condition_id = vkd3d_dxbc_compiler_emit_int_to_bool(compiler, + VKD3D_SHADER_CONDITIONAL_OP_NZ, component_count, condition_id); + + val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, src2_id, src1_id); + vkd3d_dxbc_compiler_emit_store_dst(compiler, &dst[0], val_id); + val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, src1_id, src2_id); + vkd3d_dxbc_compiler_emit_store_dst(compiler, &dst[1], val_id); +} + +static void vkd3d_dxbc_compiler_emit_dot(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + enum vkd3d_shader_component_type component_type; + uint32_t type_id, val_id, src_ids[2]; + unsigned int component_count, i; + DWORD write_mask; + + component_count = vkd3d_write_mask_component_count(dst->write_mask); + component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); + + if (instruction->handler_idx == VKD3DSIH_DP4) + write_mask = VKD3DSP_WRITEMASK_ALL; + else if (instruction->handler_idx == VKD3DSIH_DP3) + write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_2; + else + write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; + + assert(instruction->src_count == ARRAY_SIZE(src_ids)); + for (i = 0; i < ARRAY_SIZE(src_ids); ++i) + src_ids[i] = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[i], write_mask); + + type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + + val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpDot, type_id, src_ids[0], src_ids[1]); + if (component_count > 1) + { + val_id = vkd3d_dxbc_compiler_emit_construct_vector(compiler, + component_type, component_count, val_id, 0, 1); + } + if (instruction->flags & VKD3DSI_PRECISE_XYZW) + vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0); + + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void vkd3d_dxbc_compiler_emit_rcp(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, src_id, val_id, div_id; + unsigned int component_count; + + component_count = vkd3d_write_mask_component_count(dst->write_mask); + type_id = vkd3d_dxbc_compiler_get_type_id_for_dst(compiler, dst); + + src_id = vkd3d_dxbc_compiler_emit_load_src(compiler, src, dst->write_mask); + if (src->reg.data_type == VKD3D_DATA_DOUBLE) + div_id = vkd3d_dxbc_compiler_get_constant_double_vector(compiler, 1.0, component_count); + else + div_id = vkd3d_dxbc_compiler_get_constant_float_vector(compiler, 1.0f, component_count); + val_id = vkd3d_spirv_build_op_fdiv(builder, type_id, div_id, src_id); + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void vkd3d_dxbc_compiler_emit_sincos(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_dst_param *dst_sin = &instruction->dst[0]; + const struct vkd3d_shader_dst_param *dst_cos = &instruction->dst[1]; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, src_id, sin_id = 0, cos_id = 0; + + if (dst_sin->reg.type != VKD3DSPR_NULL) + { + type_id = vkd3d_dxbc_compiler_get_type_id_for_dst(compiler, dst_sin); + src_id = vkd3d_dxbc_compiler_emit_load_src(compiler, src, dst_sin->write_mask); + + sin_id = vkd3d_spirv_build_op_glsl_std450_sin(builder, type_id, src_id); + } + + if (dst_cos->reg.type != VKD3DSPR_NULL) + { + if (dst_sin->reg.type == VKD3DSPR_NULL || dst_cos->write_mask != dst_sin->write_mask) + { + type_id = vkd3d_dxbc_compiler_get_type_id_for_dst(compiler, dst_cos); + src_id = vkd3d_dxbc_compiler_emit_load_src(compiler, src, dst_cos->write_mask); + } + + cos_id = vkd3d_spirv_build_op_glsl_std450_cos(builder, type_id, src_id); + } + + if (sin_id) + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst_sin, sin_id); + + if (cos_id) + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst_cos, cos_id); +} + +static void vkd3d_dxbc_compiler_emit_imul(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, val_id, src0_id, src1_id; + + if (dst[0].reg.type != VKD3DSPR_NULL) + FIXME("Extended multiplies not implemented.\n"); /* SpvOpSMulExtended */ + + if (dst[1].reg.type == VKD3DSPR_NULL) + return; + + type_id = vkd3d_dxbc_compiler_get_type_id_for_dst(compiler, &dst[1]); + + src0_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], dst[1].write_mask); + src1_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[1], dst[1].write_mask); + + val_id = vkd3d_spirv_build_op_imul(builder, type_id, src0_id, src1_id); + + vkd3d_dxbc_compiler_emit_store_dst(compiler, &dst[1], val_id); +} + +static void vkd3d_dxbc_compiler_emit_imad(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, val_id, src_ids[3]; + unsigned int i, component_count; + + component_count = vkd3d_write_mask_component_count(dst->write_mask); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, component_count); + + for (i = 0; i < ARRAY_SIZE(src_ids); ++i) + src_ids[i] = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[i], dst->write_mask); + + val_id = vkd3d_spirv_build_op_imul(builder, type_id, src_ids[0], src_ids[1]); + val_id = vkd3d_spirv_build_op_iadd(builder, type_id, val_id, src_ids[2]); + + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void vkd3d_dxbc_compiler_emit_udiv(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + uint32_t type_id, val_id, src0_id, src1_id, condition_id, uint_max_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + unsigned int component_count = 0; + + if (dst[0].reg.type != VKD3DSPR_NULL) + { + component_count = vkd3d_write_mask_component_count(dst[0].write_mask); + type_id = vkd3d_dxbc_compiler_get_type_id_for_dst(compiler, &dst[0]); + + src0_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], dst[0].write_mask); + src1_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[1], dst[0].write_mask); + + condition_id = vkd3d_dxbc_compiler_emit_int_to_bool(compiler, + VKD3D_SHADER_CONDITIONAL_OP_NZ, component_count, src1_id); + uint_max_id = vkd3d_dxbc_compiler_get_constant_uint_vector(compiler, + 0xffffffff, component_count); + + val_id = vkd3d_spirv_build_op_udiv(builder, type_id, src0_id, src1_id); + /* The SPIR-V spec says: "The resulting value is undefined if Operand 2 is 0." */ + val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, val_id, uint_max_id); + + vkd3d_dxbc_compiler_emit_store_dst(compiler, &dst[0], val_id); + } + + if (dst[1].reg.type != VKD3DSPR_NULL) + { + if (!component_count || dst[0].write_mask != dst[1].write_mask) + { + component_count = vkd3d_write_mask_component_count(dst[1].write_mask); + type_id = vkd3d_dxbc_compiler_get_type_id_for_dst(compiler, &dst[1]); + + src0_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], dst[1].write_mask); + src1_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[1], dst[1].write_mask); + + condition_id = vkd3d_dxbc_compiler_emit_int_to_bool(compiler, + VKD3D_SHADER_CONDITIONAL_OP_NZ, component_count, src1_id); + uint_max_id = vkd3d_dxbc_compiler_get_constant_uint_vector(compiler, + 0xffffffff, component_count); + } + + val_id = vkd3d_spirv_build_op_umod(builder, type_id, src0_id, src1_id); + /* The SPIR-V spec says: "The resulting value is undefined if Operand 2 is 0." */ + val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, val_id, uint_max_id); + + vkd3d_dxbc_compiler_emit_store_dst(compiler, &dst[1], val_id); + } +} + +static void vkd3d_dxbc_compiler_emit_bitfield_instruction(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + uint32_t src_ids[4], constituents[VKD3D_VEC4_SIZE], type_id, mask_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + enum vkd3d_shader_component_type component_type; + unsigned int i, j, k, src_count; + DWORD write_mask; + SpvOp op; + + src_count = instruction->src_count; + assert(2 <= src_count && src_count <= ARRAY_SIZE(src_ids)); + + component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); + type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + mask_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0x1f); + + switch (instruction->handler_idx) + { + case VKD3DSIH_BFI: op = SpvOpBitFieldInsert; break; + case VKD3DSIH_IBFE: op = SpvOpBitFieldSExtract; break; + case VKD3DSIH_UBFE: op = SpvOpBitFieldUExtract; break; + default: + ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + return; + } + + assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + for (i = 0, k = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) + continue; + + for (j = 0; j < src_count; ++j) + { + src_ids[src_count - j - 1] = vkd3d_dxbc_compiler_emit_load_src_with_type(compiler, + &src[j], write_mask, component_type); + } + + /* In SPIR-V, the last two operands are Offset and Count. */ + for (j = src_count - 2; j < src_count; ++j) + { + src_ids[j] = vkd3d_spirv_build_op_and(builder, type_id, src_ids[j], mask_id); + } + + constituents[k++] = vkd3d_spirv_build_op_trv(builder, &builder->function_stream, + op, type_id, src_ids, src_count); + } + + vkd3d_dxbc_compiler_emit_store_dst_components(compiler, dst, component_type, constituents); +} + +static void vkd3d_dxbc_compiler_emit_f16tof32(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + uint32_t instr_set_id, type_id, scalar_type_id, src_id, result_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t components[VKD3D_VEC4_SIZE]; + unsigned int i, j; + DWORD write_mask; + + instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2); + scalar_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 1); + + /* FIXME: Consider a single UnpackHalf2x16 intruction per 2 components. */ + assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) + continue; + + src_id = vkd3d_dxbc_compiler_emit_load_src(compiler, src, write_mask); + result_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, + instr_set_id, GLSLstd450UnpackHalf2x16, &src_id, 1); + components[j++] = vkd3d_spirv_build_op_composite_extract1(builder, + scalar_type_id, result_id, 0); + } + + vkd3d_dxbc_compiler_emit_store_dst_components(compiler, + dst, vkd3d_component_type_from_data_type(dst->reg.data_type), components); +} + +static void vkd3d_dxbc_compiler_emit_f32tof16(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + uint32_t instr_set_id, type_id, scalar_type_id, src_id, zero_id, constituents[2]; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t components[VKD3D_VEC4_SIZE]; + unsigned int i, j; + DWORD write_mask; + + instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2); + scalar_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + zero_id = vkd3d_dxbc_compiler_get_constant_float(compiler, 0.0f); + + /* FIXME: Consider a single PackHalf2x16 intruction per 2 components. */ + assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) + continue; + + src_id = vkd3d_dxbc_compiler_emit_load_src(compiler, src, write_mask); + constituents[0] = src_id; + constituents[1] = zero_id; + src_id = vkd3d_spirv_build_op_composite_construct(builder, + type_id, constituents, ARRAY_SIZE(constituents)); + components[j++] = vkd3d_spirv_build_op_ext_inst(builder, scalar_type_id, + instr_set_id, GLSLstd450PackHalf2x16, &src_id, 1); + } + + vkd3d_dxbc_compiler_emit_store_dst_components(compiler, + dst, vkd3d_component_type_from_data_type(dst->reg.data_type), components); +} + +static void vkd3d_dxbc_compiler_emit_comparison_instruction(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t src0_id, src1_id, type_id, result_id; + unsigned int component_count; + SpvOp op; + + switch (instruction->handler_idx) + { + case VKD3DSIH_DEQ: + case VKD3DSIH_EQ: op = SpvOpFOrdEqual; break; + case VKD3DSIH_DGE: + case VKD3DSIH_GE: op = SpvOpFOrdGreaterThanEqual; break; + case VKD3DSIH_IEQ: op = SpvOpIEqual; break; + case VKD3DSIH_IGE: op = SpvOpSGreaterThanEqual; break; + case VKD3DSIH_ILT: op = SpvOpSLessThan; break; + case VKD3DSIH_INE: op = SpvOpINotEqual; break; + case VKD3DSIH_DLT: + case VKD3DSIH_LT: op = SpvOpFOrdLessThan; break; + case VKD3DSIH_DNE: + case VKD3DSIH_NE: op = SpvOpFUnordNotEqual; break; + case VKD3DSIH_UGE: op = SpvOpUGreaterThanEqual; break; + case VKD3DSIH_ULT: op = SpvOpULessThan; break; + default: + ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + return; + } + + component_count = vkd3d_write_mask_component_count(dst->write_mask); + + src0_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], dst->write_mask); + src1_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[1], dst->write_mask); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + op, type_id, src0_id, src1_id); + + result_id = vkd3d_dxbc_compiler_emit_bool_to_int(compiler, component_count, result_id); + vkd3d_dxbc_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); +} + +static uint32_t vkd3d_dxbc_compiler_emit_conditional_branch(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction, uint32_t target_block_id) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t condition_id, merge_block_id; + + condition_id = vkd3d_dxbc_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); + condition_id = vkd3d_dxbc_compiler_emit_int_to_bool(compiler, instruction->flags, 1, condition_id); + + merge_block_id = vkd3d_spirv_alloc_id(builder); + + vkd3d_spirv_build_op_selection_merge(builder, merge_block_id, SpvSelectionControlMaskNone); + vkd3d_spirv_build_op_branch_conditional(builder, condition_id, target_block_id, merge_block_id); + + return merge_block_id; +} + +static void vkd3d_dxbc_compiler_emit_return(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_phase *phase = vkd3d_dxbc_compiler_get_current_shader_phase(compiler); + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + + if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (!phase || is_control_point_phase(phase))) + vkd3d_dxbc_compiler_emit_shader_epilogue_invocation(compiler); + + vkd3d_spirv_build_op_return(builder); +} + +static void vkd3d_dxbc_compiler_emit_retc(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t target_id, merge_block_id; + + target_id = vkd3d_spirv_alloc_id(builder); + merge_block_id = vkd3d_dxbc_compiler_emit_conditional_branch(compiler, instruction, target_id); + + vkd3d_spirv_build_op_label(builder, target_id); + vkd3d_dxbc_compiler_emit_return(compiler, instruction); + vkd3d_spirv_build_op_label(builder, merge_block_id); +} + +static void vkd3d_dxbc_compiler_emit_kill(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t target_id, merge_block_id; + + target_id = vkd3d_spirv_alloc_id(builder); + merge_block_id = vkd3d_dxbc_compiler_emit_conditional_branch(compiler, instruction, target_id); + + vkd3d_spirv_build_op_label(builder, target_id); + + if (vkd3d_dxbc_compiler_is_target_extension_supported(compiler, + VKD3D_SHADER_SPIRV_EXTENSION_EXT_DEMOTE_TO_HELPER_INVOCATION)) + { + vkd3d_spirv_enable_capability(builder, SpvCapabilityDemoteToHelperInvocationEXT); + vkd3d_spirv_build_op_demote_to_helper_invocation(builder); + vkd3d_spirv_build_op_branch(builder, merge_block_id); + } + else + { + vkd3d_spirv_build_op_kill(builder); + } + + vkd3d_spirv_build_op_label(builder, merge_block_id); +} + +static struct vkd3d_control_flow_info *vkd3d_dxbc_compiler_push_control_flow_level( + struct vkd3d_dxbc_compiler *compiler) +{ + if (!vkd3d_array_reserve((void **)&compiler->control_flow_info, &compiler->control_flow_info_size, + compiler->control_flow_depth + 1, sizeof(*compiler->control_flow_info))) + { + ERR("Failed to allocate control flow info structure.\n"); + return NULL; + } + + return &compiler->control_flow_info[compiler->control_flow_depth++]; +} + +static void vkd3d_dxbc_compiler_pop_control_flow_level(struct vkd3d_dxbc_compiler *compiler) +{ + struct vkd3d_control_flow_info *cf_info; + + assert(compiler->control_flow_depth); + + cf_info = &compiler->control_flow_info[--compiler->control_flow_depth]; + memset(cf_info, 0, sizeof(*cf_info)); +} + +static struct vkd3d_control_flow_info *vkd3d_dxbc_compiler_find_innermost_loop( + struct vkd3d_dxbc_compiler *compiler) +{ + int depth; + + for (depth = compiler->control_flow_depth - 1; depth >= 0; --depth) + { + if (compiler->control_flow_info[depth].current_block == VKD3D_BLOCK_LOOP) + return &compiler->control_flow_info[depth]; + } + + return NULL; +} + +static struct vkd3d_control_flow_info *vkd3d_dxbc_compiler_find_innermost_breakable_cf_construct( + struct vkd3d_dxbc_compiler *compiler) +{ + int depth; + + for (depth = compiler->control_flow_depth - 1; depth >= 0; --depth) + { + if (compiler->control_flow_info[depth].current_block == VKD3D_BLOCK_LOOP + || compiler->control_flow_info[depth].current_block == VKD3D_BLOCK_SWITCH) + return &compiler->control_flow_info[depth]; + } + + return NULL; +} + +static int vkd3d_dxbc_compiler_emit_control_flow_instruction(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + uint32_t loop_header_block_id, loop_body_block_id, continue_block_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t merge_block_id, val_id, condition_id, true_label; + struct vkd3d_control_flow_info *cf_info; + + cf_info = compiler->control_flow_depth + ? &compiler->control_flow_info[compiler->control_flow_depth - 1] : NULL; + + switch (instruction->handler_idx) + { + case VKD3DSIH_IF: + if (!(cf_info = vkd3d_dxbc_compiler_push_control_flow_level(compiler))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + val_id = vkd3d_dxbc_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); + condition_id = vkd3d_dxbc_compiler_emit_int_to_bool(compiler, instruction->flags, 1, val_id); + + true_label = vkd3d_spirv_alloc_id(builder); + merge_block_id = vkd3d_spirv_alloc_id(builder); + vkd3d_spirv_build_op_selection_merge(builder, merge_block_id, SpvSelectionControlMaskNone); + cf_info->u.if_.stream_location = vkd3d_spirv_stream_current_location(&builder->function_stream); + vkd3d_spirv_build_op_branch_conditional(builder, condition_id, true_label, merge_block_id); + + vkd3d_spirv_build_op_label(builder, true_label); + + cf_info->u.if_.id = compiler->branch_id; + cf_info->u.if_.merge_block_id = merge_block_id; + cf_info->u.if_.else_block_id = 0; + cf_info->inside_block = true; + cf_info->current_block = VKD3D_BLOCK_IF; + + vkd3d_spirv_build_op_name(builder, merge_block_id, "branch%u_merge", compiler->branch_id); + vkd3d_spirv_build_op_name(builder, true_label, "branch%u_true", compiler->branch_id); + ++compiler->branch_id; + break; + + case VKD3DSIH_ELSE: + assert(compiler->control_flow_depth); + assert(cf_info->current_block == VKD3D_BLOCK_IF); + + if (cf_info->inside_block) + vkd3d_spirv_build_op_branch(builder, cf_info->u.if_.merge_block_id); + + cf_info->u.if_.else_block_id = vkd3d_spirv_alloc_id(builder); + vkd3d_spirv_as_op_branch_conditional(&builder->function_stream, + cf_info->u.if_.stream_location)->false_label = cf_info->u.if_.else_block_id; + vkd3d_spirv_build_op_name(builder, + cf_info->u.if_.else_block_id, "branch%u_false", cf_info->u.if_.id); + vkd3d_spirv_build_op_label(builder, cf_info->u.if_.else_block_id); + cf_info->inside_block = true; + break; + + case VKD3DSIH_ENDIF: + assert(compiler->control_flow_depth); + assert(cf_info->current_block == VKD3D_BLOCK_IF); + + if (cf_info->inside_block) + vkd3d_spirv_build_op_branch(builder, cf_info->u.if_.merge_block_id); + + vkd3d_spirv_build_op_label(builder, cf_info->u.if_.merge_block_id); + + vkd3d_dxbc_compiler_pop_control_flow_level(compiler); + break; + + case VKD3DSIH_LOOP: + if (!(cf_info = vkd3d_dxbc_compiler_push_control_flow_level(compiler))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + loop_header_block_id = vkd3d_spirv_alloc_id(builder); + loop_body_block_id = vkd3d_spirv_alloc_id(builder); + continue_block_id = vkd3d_spirv_alloc_id(builder); + merge_block_id = vkd3d_spirv_alloc_id(builder); + + vkd3d_spirv_build_op_branch(builder, loop_header_block_id); + vkd3d_spirv_build_op_label(builder, loop_header_block_id); + vkd3d_spirv_build_op_loop_merge(builder, merge_block_id, continue_block_id, SpvLoopControlMaskNone); + vkd3d_spirv_build_op_branch(builder, loop_body_block_id); + + vkd3d_spirv_build_op_label(builder, loop_body_block_id); + + cf_info->u.loop.header_block_id = loop_header_block_id; + cf_info->u.loop.continue_block_id = continue_block_id; + cf_info->u.loop.merge_block_id = merge_block_id; + cf_info->current_block = VKD3D_BLOCK_LOOP; + cf_info->inside_block = true; + + vkd3d_spirv_build_op_name(builder, loop_header_block_id, "loop%u_header", compiler->loop_id); + vkd3d_spirv_build_op_name(builder, loop_body_block_id, "loop%u_body", compiler->loop_id); + vkd3d_spirv_build_op_name(builder, continue_block_id, "loop%u_continue", compiler->loop_id); + vkd3d_spirv_build_op_name(builder, merge_block_id, "loop%u_merge", compiler->loop_id); + ++compiler->loop_id; + break; + + case VKD3DSIH_ENDLOOP: + assert(compiler->control_flow_depth); + assert(cf_info->current_block == VKD3D_BLOCK_LOOP); + + /* The loop block may have already been ended by an unconditional + * break instruction right before the end of the loop. */ + if (cf_info->inside_block) + vkd3d_spirv_build_op_branch(builder, cf_info->u.loop.continue_block_id); + + vkd3d_spirv_build_op_label(builder, cf_info->u.loop.continue_block_id); + vkd3d_spirv_build_op_branch(builder, cf_info->u.loop.header_block_id); + vkd3d_spirv_build_op_label(builder, cf_info->u.loop.merge_block_id); + + vkd3d_dxbc_compiler_pop_control_flow_level(compiler); + break; + + case VKD3DSIH_SWITCH: + if (!(cf_info = vkd3d_dxbc_compiler_push_control_flow_level(compiler))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + merge_block_id = vkd3d_spirv_alloc_id(builder); + + assert(src->reg.data_type == VKD3D_DATA_INT); + val_id = vkd3d_dxbc_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); + + vkd3d_spirv_build_op_selection_merge(builder, merge_block_id, SpvSelectionControlMaskNone); + + cf_info->u.switch_.id = compiler->switch_id; + cf_info->u.switch_.merge_block_id = merge_block_id; + cf_info->u.switch_.stream_location = vkd3d_spirv_stream_current_location(&builder->function_stream); + cf_info->u.switch_.selector_id = val_id; + cf_info->u.switch_.case_blocks = NULL; + cf_info->u.switch_.case_blocks_size = 0; + cf_info->u.switch_.case_block_count = 0; + cf_info->u.switch_.default_block_id = 0; + cf_info->inside_block = false; + cf_info->current_block = VKD3D_BLOCK_SWITCH; + + vkd3d_spirv_build_op_name(builder, merge_block_id, "switch%u_merge", compiler->switch_id); + + ++compiler->switch_id; + + if (!vkd3d_array_reserve((void **)&cf_info->u.switch_.case_blocks, &cf_info->u.switch_.case_blocks_size, + 10, sizeof(*cf_info->u.switch_.case_blocks))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + break; + + case VKD3DSIH_ENDSWITCH: + assert(compiler->control_flow_depth); + assert(cf_info->current_block == VKD3D_BLOCK_SWITCH); + assert(!cf_info->inside_block); + + if (!cf_info->u.switch_.default_block_id) + cf_info->u.switch_.default_block_id = cf_info->u.switch_.merge_block_id; + + vkd3d_spirv_build_op_label(builder, cf_info->u.switch_.merge_block_id); + + /* The OpSwitch instruction is inserted when the endswitch + * instruction is processed because we do not know the number + * of case statments in advance.*/ + vkd3d_spirv_begin_function_stream_insertion(builder, cf_info->u.switch_.stream_location); + vkd3d_spirv_build_op_switch(builder, cf_info->u.switch_.selector_id, + cf_info->u.switch_.default_block_id, cf_info->u.switch_.case_blocks, + cf_info->u.switch_.case_block_count); + vkd3d_spirv_end_function_stream_insertion(builder); + + vkd3d_free(cf_info->u.switch_.case_blocks); + vkd3d_dxbc_compiler_pop_control_flow_level(compiler); + break; + + case VKD3DSIH_CASE: + { + uint32_t label_id, value; + + assert(compiler->control_flow_depth); + assert(cf_info->current_block == VKD3D_BLOCK_SWITCH); + + assert(src->swizzle == VKD3D_SHADER_NO_SWIZZLE && src->reg.type == VKD3DSPR_IMMCONST); + value = *src->reg.u.immconst_uint; + + if (!vkd3d_array_reserve((void **)&cf_info->u.switch_.case_blocks, &cf_info->u.switch_.case_blocks_size, + 2 * (cf_info->u.switch_.case_block_count + 1), sizeof(*cf_info->u.switch_.case_blocks))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + label_id = vkd3d_spirv_alloc_id(builder); + if (cf_info->inside_block) /* fall-through */ + vkd3d_spirv_build_op_branch(builder, label_id); + + cf_info->u.switch_.case_blocks[2 * cf_info->u.switch_.case_block_count + 0] = value; + cf_info->u.switch_.case_blocks[2 * cf_info->u.switch_.case_block_count + 1] = label_id; + ++cf_info->u.switch_.case_block_count; + + vkd3d_spirv_build_op_label(builder, label_id); + cf_info->inside_block = true; + vkd3d_spirv_build_op_name(builder, label_id, "switch%u_case%u", cf_info->u.switch_.id, value); + break; + } + + case VKD3DSIH_DEFAULT: + assert(compiler->control_flow_depth); + assert(cf_info->current_block == VKD3D_BLOCK_SWITCH); + assert(!cf_info->u.switch_.default_block_id); + + cf_info->u.switch_.default_block_id = vkd3d_spirv_alloc_id(builder); + if (cf_info->inside_block) /* fall-through */ + vkd3d_spirv_build_op_branch(builder, cf_info->u.switch_.default_block_id); + + vkd3d_spirv_build_op_label(builder, cf_info->u.switch_.default_block_id); + vkd3d_spirv_build_op_name(builder, cf_info->u.switch_.default_block_id, + "switch%u_default", cf_info->u.switch_.id); + cf_info->inside_block = true; + break; + + case VKD3DSIH_BREAK: + { + struct vkd3d_control_flow_info *breakable_cf_info; + + assert(compiler->control_flow_depth); + + if (!(breakable_cf_info = vkd3d_dxbc_compiler_find_innermost_breakable_cf_construct(compiler))) + { + FIXME("Unhandled break instruction.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (breakable_cf_info->current_block == VKD3D_BLOCK_LOOP) + { + vkd3d_spirv_build_op_branch(builder, breakable_cf_info->u.loop.merge_block_id); + } + else if (breakable_cf_info->current_block == VKD3D_BLOCK_SWITCH) + { + /* The current case block may have already been ended by an + * unconditional continue instruction. */ + if (breakable_cf_info->inside_block) + vkd3d_spirv_build_op_branch(builder, breakable_cf_info->u.switch_.merge_block_id); + } + + cf_info->inside_block = false; + break; + } + + case VKD3DSIH_BREAKP: + { + struct vkd3d_control_flow_info *loop_cf_info; + + assert(compiler->control_flow_depth); + + if (!(loop_cf_info = vkd3d_dxbc_compiler_find_innermost_loop(compiler))) + { + ERR("Invalid 'breakc' instruction outside loop.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + merge_block_id = vkd3d_dxbc_compiler_emit_conditional_branch(compiler, + instruction, loop_cf_info->u.loop.merge_block_id); + vkd3d_spirv_build_op_label(builder, merge_block_id); + break; + } + + case VKD3DSIH_CONTINUE: + { + struct vkd3d_control_flow_info *loop_cf_info; + + assert(compiler->control_flow_depth); + + if (!(loop_cf_info = vkd3d_dxbc_compiler_find_innermost_loop(compiler))) + { + ERR("Invalid 'continue' instruction outside loop.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + vkd3d_spirv_build_op_branch(builder, loop_cf_info->u.loop.continue_block_id); + + cf_info->inside_block = false; + break; + } + + case VKD3DSIH_CONTINUEP: + { + struct vkd3d_control_flow_info *loop_cf_info; + + if (!(loop_cf_info = vkd3d_dxbc_compiler_find_innermost_loop(compiler))) + { + ERR("Invalid 'continuec' instruction outside loop.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + merge_block_id = vkd3d_dxbc_compiler_emit_conditional_branch(compiler, + instruction, loop_cf_info->u.loop.continue_block_id); + vkd3d_spirv_build_op_label(builder, merge_block_id); + break; + } + + case VKD3DSIH_RET: + vkd3d_dxbc_compiler_emit_return(compiler, instruction); + + if (cf_info) + cf_info->inside_block = false; + break; + + case VKD3DSIH_RETP: + vkd3d_dxbc_compiler_emit_retc(compiler, instruction); + break; + + case VKD3DSIH_TEXKILL: + vkd3d_dxbc_compiler_emit_kill(compiler, instruction); + break; + + default: + ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + break; + } + + return VKD3D_OK; +} + +static void vkd3d_dxbc_compiler_emit_deriv_instruction(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + const struct instruction_info *info; + uint32_t type_id, src_id, val_id; + unsigned int i; + + static const struct instruction_info + { + enum vkd3d_shader_opcode handler_idx; + SpvOp op; + bool needs_derivative_control; + } + deriv_instructions[] = + { + {VKD3DSIH_DSX, SpvOpDPdx}, + {VKD3DSIH_DSX_COARSE, SpvOpDPdxCoarse, true}, + {VKD3DSIH_DSX_FINE, SpvOpDPdxFine, true}, + {VKD3DSIH_DSY, SpvOpDPdy}, + {VKD3DSIH_DSY_COARSE, SpvOpDPdyCoarse, true}, + {VKD3DSIH_DSY_FINE, SpvOpDPdyFine, true}, + }; + + info = NULL; + for (i = 0; i < ARRAY_SIZE(deriv_instructions); ++i) + { + if (deriv_instructions[i].handler_idx == instruction->handler_idx) + { + info = &deriv_instructions[i]; + break; + } + } + if (!info) + { + ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + return; + } + + if (info->needs_derivative_control) + vkd3d_spirv_enable_capability(builder, SpvCapabilityDerivativeControl); + + assert(instruction->dst_count == 1); + assert(instruction->src_count == 1); + + type_id = vkd3d_dxbc_compiler_get_type_id_for_dst(compiler, dst); + src_id = vkd3d_dxbc_compiler_emit_load_src(compiler, src, dst->write_mask); + val_id = vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, info->op, type_id, src_id); + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +struct vkd3d_shader_image +{ + uint32_t id; + uint32_t image_id; + uint32_t sampled_image_id; + + enum vkd3d_shader_component_type sampled_type; + uint32_t image_type_id; + const struct vkd3d_spirv_resource_type *resource_type_info; + unsigned int structure_stride; + bool raw; +}; + +#define VKD3D_IMAGE_FLAG_NONE 0x0 +#define VKD3D_IMAGE_FLAG_DEPTH 0x1 +#define VKD3D_IMAGE_FLAG_NO_LOAD 0x2 +#define VKD3D_IMAGE_FLAG_SAMPLED 0x4 + +static const struct vkd3d_symbol *vkd3d_dxbc_compiler_find_resource(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *resource_reg) +{ + struct vkd3d_symbol resource_key; + struct rb_entry *entry; + + vkd3d_symbol_make_resource(&resource_key, resource_reg); + entry = rb_get(&compiler->symbol_table, &resource_key); + assert(entry); + return RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); +} + +static const struct vkd3d_symbol *vkd3d_dxbc_compiler_find_combined_sampler(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_register *resource_reg, const struct vkd3d_shader_register *sampler_reg) +{ + const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; + unsigned int sampler_space, sampler_index; + struct vkd3d_symbol key; + struct rb_entry *entry; + + if (!shader_interface->combined_sampler_count) + return NULL; + + if (sampler_reg) + { + const struct vkd3d_symbol *sampler_symbol; + + vkd3d_symbol_make_sampler(&key, sampler_reg); + if (!(entry = rb_get(&compiler->symbol_table, &key))) + return NULL; + sampler_symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); + sampler_space = sampler_symbol->info.sampler.range.space; + sampler_index = sampler_symbol->info.sampler.range.first; + } + else + { + sampler_space = 0; + sampler_index = VKD3D_SHADER_DUMMY_SAMPLER_INDEX; + } + + vkd3d_symbol_make_combined_sampler(&key, resource_reg, sampler_space, sampler_index); + if ((entry = rb_get(&compiler->symbol_table, &key))) + return RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); + return NULL; +} + +static void vkd3d_dxbc_compiler_prepare_image(struct vkd3d_dxbc_compiler *compiler, + struct vkd3d_shader_image *image, const struct vkd3d_shader_register *resource_reg, + const struct vkd3d_shader_register *sampler_reg, unsigned int flags) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t sampler_var_id, sampler_id, sampled_image_type_id; + const struct vkd3d_symbol *symbol = NULL; + bool load, sampled, depth_comparison; + + load = !(flags & VKD3D_IMAGE_FLAG_NO_LOAD); + sampled = flags & VKD3D_IMAGE_FLAG_SAMPLED; + depth_comparison = flags & VKD3D_IMAGE_FLAG_DEPTH; + + if (resource_reg->type == VKD3DSPR_RESOURCE) + symbol = vkd3d_dxbc_compiler_find_combined_sampler(compiler, resource_reg, sampler_reg); + if (!symbol) + symbol = vkd3d_dxbc_compiler_find_resource(compiler, resource_reg); + + if (symbol->descriptor_array) + { + const struct vkd3d_symbol_descriptor_array_data *array_data = &symbol->descriptor_array->info.descriptor_array; + uint32_t ptr_type_id, index_id; + + index_id = vkd3d_dxbc_compiler_get_descriptor_index(compiler, resource_reg, symbol->descriptor_array, + symbol->info.resource.binding_base_idx, symbol->info.resource.resource_type_info->resource_type); + + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, array_data->storage_class, + array_data->contained_type_id); + image->image_type_id = array_data->contained_type_id; + + image->id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, symbol->id, &index_id, 1); + } + else + { + image->id = symbol->id; + image->image_type_id = symbol->info.resource.type_id; + } + image->sampled_type = symbol->info.resource.sampled_type; + image->resource_type_info = symbol->info.resource.resource_type_info; + image->structure_stride = symbol->info.resource.structure_stride; + image->raw = symbol->info.resource.raw; + + if (symbol->type == VKD3D_SYMBOL_COMBINED_SAMPLER) + { + sampled_image_type_id = vkd3d_spirv_get_op_type_sampled_image(builder, image->image_type_id); + image->sampled_image_id = vkd3d_spirv_build_op_load(builder, + sampled_image_type_id, image->id, SpvMemoryAccessMaskNone); + image->image_id = !sampled ? vkd3d_spirv_build_op_image(builder, + image->image_type_id, image->sampled_image_id) : 0; + return; + } + + if (load) + { + image->image_id = vkd3d_spirv_build_op_load(builder, image->image_type_id, image->id, SpvMemoryAccessMaskNone); + if (resource_reg->non_uniform) + vkd3d_dxbc_compiler_decorate_nonuniform(compiler, image->image_id); + } + else + { + image->image_id = 0; + } + + image->image_type_id = vkd3d_dxbc_compiler_get_image_type_id(compiler, resource_reg, + &symbol->info.resource.range, image->resource_type_info, + image->sampled_type, image->structure_stride || image->raw, depth_comparison); + + if (sampled) + { + struct vkd3d_shader_register_info register_info; + + assert(image->image_id); + assert(sampler_reg); + + if (!vkd3d_dxbc_compiler_get_register_info(compiler, sampler_reg, ®ister_info)) + ERR("Failed to get sampler register info.\n"); + sampler_var_id = register_info.id; + if (register_info.descriptor_array) + { + const struct vkd3d_symbol_descriptor_array_data *array_data + = ®ister_info.descriptor_array->info.descriptor_array; + uint32_t ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, + register_info.storage_class, array_data->contained_type_id); + uint32_t array_idx = vkd3d_dxbc_compiler_get_descriptor_index(compiler, sampler_reg, + register_info.descriptor_array, register_info.binding_base_idx, VKD3D_SHADER_RESOURCE_NONE); + sampler_var_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, register_info.id, &array_idx, 1); + } + + sampler_id = vkd3d_spirv_build_op_load(builder, + vkd3d_spirv_get_op_type_sampler(builder), sampler_var_id, SpvMemoryAccessMaskNone); + if (sampler_reg->non_uniform) + vkd3d_dxbc_compiler_decorate_nonuniform(compiler, sampler_id); + + sampled_image_type_id = vkd3d_spirv_get_op_type_sampled_image(builder, image->image_type_id); + image->sampled_image_id = vkd3d_spirv_build_op_sampled_image(builder, + sampled_image_type_id, image->image_id, sampler_id); + if (resource_reg->non_uniform) + vkd3d_dxbc_compiler_decorate_nonuniform(compiler, image->sampled_image_id); + } + else + { + image->sampled_image_id = 0; + } +} + +static uint32_t vkd3d_dxbc_compiler_emit_texel_offset(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction, + const struct vkd3d_spirv_resource_type *resource_type_info) +{ + const struct vkd3d_shader_texel_offset *offset = &instruction->texel_offset; + unsigned int component_count = resource_type_info->offset_component_count; + int32_t data[4] = {offset->u, offset->v, offset->w, 0}; + return vkd3d_dxbc_compiler_get_constant(compiler, + VKD3D_SHADER_COMPONENT_INT, component_count, (const uint32_t *)data); +} + +static void vkd3d_dxbc_compiler_emit_ld(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, coordinate_id, val_id; + SpvImageOperandsMask operands_mask = 0; + unsigned int image_operand_count = 0; + struct vkd3d_shader_image image; + uint32_t image_operands[2]; + DWORD coordinate_mask; + bool multisample; + + multisample = instruction->handler_idx == VKD3DSIH_LD2DMS; + + vkd3d_dxbc_compiler_prepare_image(compiler, &image, &src[1].reg, NULL, VKD3D_IMAGE_FLAG_NONE); + + type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); + coordinate_mask = (1u << image.resource_type_info->coordinate_component_count) - 1; + coordinate_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], coordinate_mask); + if (image.resource_type_info->resource_type != VKD3D_SHADER_RESOURCE_BUFFER && !multisample) + { + operands_mask |= SpvImageOperandsLodMask; + image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_load_src(compiler, + &src[0], VKD3DSP_WRITEMASK_3); + } + if (vkd3d_shader_instruction_has_texel_offset(instruction)) + { + operands_mask |= SpvImageOperandsConstOffsetMask; + image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_texel_offset(compiler, + instruction, image.resource_type_info); + } + if (multisample) + { + operands_mask |= SpvImageOperandsSampleMask; + image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_load_src(compiler, + &src[2], VKD3DSP_WRITEMASK_0); + } + assert(image_operand_count <= ARRAY_SIZE(image_operands)); + val_id = vkd3d_spirv_build_op_image_fetch(builder, type_id, + image.image_id, coordinate_id, operands_mask, image_operands, image_operand_count); + + vkd3d_dxbc_compiler_emit_store_dst_swizzled(compiler, + dst, val_id, image.sampled_type, src[1].swizzle); +} + +static void vkd3d_dxbc_compiler_emit_lod(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + const struct vkd3d_shader_src_param *resource, *sampler; + uint32_t type_id, coordinate_id, val_id; + struct vkd3d_shader_image image; + + vkd3d_spirv_enable_capability(builder, SpvCapabilityImageQuery); + + resource = &src[1]; + sampler = &src[2]; + vkd3d_dxbc_compiler_prepare_image(compiler, &image, + &resource->reg, &sampler->reg, VKD3D_IMAGE_FLAG_SAMPLED); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2); + coordinate_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL); + val_id = vkd3d_spirv_build_op_image_query_lod(builder, + type_id, image.sampled_image_id, coordinate_id); + + vkd3d_dxbc_compiler_emit_store_dst_swizzled(compiler, + dst, val_id, image.sampled_type, resource->swizzle); +} + +static void vkd3d_dxbc_compiler_emit_sample(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + const struct vkd3d_shader_src_param *resource, *sampler; + uint32_t sampled_type_id, coordinate_id, val_id; + SpvImageOperandsMask operands_mask = 0; + unsigned int image_operand_count = 0; + struct vkd3d_shader_image image; + uint32_t image_operands[3]; + DWORD coordinate_mask; + SpvOp op; + + resource = &src[1]; + sampler = &src[2]; + vkd3d_dxbc_compiler_prepare_image(compiler, &image, + &resource->reg, &sampler->reg, VKD3D_IMAGE_FLAG_SAMPLED); + + switch (instruction->handler_idx) + { + case VKD3DSIH_SAMPLE: + op = SpvOpImageSampleImplicitLod; + break; + case VKD3DSIH_SAMPLE_B: + op = SpvOpImageSampleImplicitLod; + operands_mask |= SpvImageOperandsBiasMask; + image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_load_src(compiler, + &src[3], VKD3DSP_WRITEMASK_0); + break; + case VKD3DSIH_SAMPLE_GRAD: + op = SpvOpImageSampleExplicitLod; + operands_mask |= SpvImageOperandsGradMask; + coordinate_mask = (1u << image.resource_type_info->offset_component_count) - 1; + image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_load_src(compiler, + &src[3], coordinate_mask); + image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_load_src(compiler, + &src[4], coordinate_mask); + break; + case VKD3DSIH_SAMPLE_LOD: + op = SpvOpImageSampleExplicitLod; + operands_mask |= SpvImageOperandsLodMask; + image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_load_src(compiler, + &src[3], VKD3DSP_WRITEMASK_0); + break; + default: + ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + return; + } + + if (vkd3d_shader_instruction_has_texel_offset(instruction)) + { + operands_mask |= SpvImageOperandsConstOffsetMask; + image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_texel_offset(compiler, + instruction, image.resource_type_info); + } + + sampled_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); + coordinate_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL); + assert(image_operand_count <= ARRAY_SIZE(image_operands)); + val_id = vkd3d_spirv_build_op_image_sample(builder, op, sampled_type_id, + image.sampled_image_id, coordinate_id, operands_mask, image_operands, image_operand_count); + + vkd3d_dxbc_compiler_emit_store_dst_swizzled(compiler, + dst, val_id, image.sampled_type, resource->swizzle); +} + +static void vkd3d_dxbc_compiler_emit_sample_c(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + uint32_t sampled_type_id, coordinate_id, dref_id, val_id, type_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + SpvImageOperandsMask operands_mask = 0; + unsigned int image_operand_count = 0; + struct vkd3d_shader_image image; + uint32_t image_operands[2]; + SpvOp op; + + if (instruction->handler_idx == VKD3DSIH_SAMPLE_C_LZ) + { + op = SpvOpImageSampleDrefExplicitLod; + operands_mask |= SpvImageOperandsLodMask; + image_operands[image_operand_count++] + = vkd3d_dxbc_compiler_get_constant_float(compiler, 0.0f); + } + else + { + op = SpvOpImageSampleDrefImplicitLod; + } + + vkd3d_dxbc_compiler_prepare_image(compiler, + &image, &src[1].reg, &src[2].reg, VKD3D_IMAGE_FLAG_SAMPLED | VKD3D_IMAGE_FLAG_DEPTH); + + if (vkd3d_shader_instruction_has_texel_offset(instruction)) + { + operands_mask |= SpvImageOperandsConstOffsetMask; + image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_texel_offset(compiler, + instruction, image.resource_type_info); + } + + sampled_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, 1); + coordinate_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL); + dref_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[3], VKD3DSP_WRITEMASK_0); + /* XXX: Nvidia is broken and expects that the D_ref is packed together with coordinates. */ + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + coordinate_id = vkd3d_spirv_build_op_composite_insert1(builder, + type_id, dref_id, coordinate_id, image.resource_type_info->coordinate_component_count); + val_id = vkd3d_spirv_build_op_image_sample_dref(builder, op, sampled_type_id, + image.sampled_image_id, coordinate_id, dref_id, operands_mask, + image_operands, image_operand_count); + + vkd3d_dxbc_compiler_emit_store_dst_scalar(compiler, + dst, val_id, image.sampled_type, src[1].swizzle); +} + +static void vkd3d_dxbc_compiler_emit_gather4(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_src_param *addr, *offset, *resource, *sampler; + uint32_t sampled_type_id, coordinate_id, component_id, dref_id, val_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + unsigned int image_flags = VKD3D_IMAGE_FLAG_SAMPLED; + SpvImageOperandsMask operands_mask = 0; + unsigned int image_operand_count = 0; + struct vkd3d_shader_image image; + unsigned int component_idx; + uint32_t image_operands[1]; + DWORD coordinate_mask; + bool extended_offset; + + if (instruction->handler_idx == VKD3DSIH_GATHER4_C + || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C) + image_flags |= VKD3D_IMAGE_FLAG_DEPTH; + + extended_offset = instruction->handler_idx == VKD3DSIH_GATHER4_PO + || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C; + + addr = &src[0]; + offset = extended_offset ? &src[1] : NULL; + resource = &src[1 + extended_offset]; + sampler = &src[2 + extended_offset]; + + vkd3d_dxbc_compiler_prepare_image(compiler, &image, + &resource->reg, &sampler->reg, image_flags); + + if (offset) + { + vkd3d_spirv_enable_capability(builder, SpvCapabilityImageGatherExtended); + operands_mask |= SpvImageOperandsOffsetMask; + image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_load_src(compiler, + offset, (1u << image.resource_type_info->offset_component_count) - 1); + } + else if (vkd3d_shader_instruction_has_texel_offset(instruction)) + { + operands_mask |= SpvImageOperandsConstOffsetMask; + image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_texel_offset(compiler, + instruction, image.resource_type_info); + } + + sampled_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); + coordinate_mask = (1u << image.resource_type_info->coordinate_component_count) - 1; + coordinate_id = vkd3d_dxbc_compiler_emit_load_src(compiler, addr, coordinate_mask); + if (image_flags & VKD3D_IMAGE_FLAG_DEPTH) + { + dref_id = vkd3d_dxbc_compiler_emit_load_src(compiler, + &src[3 + extended_offset], VKD3DSP_WRITEMASK_0); + val_id = vkd3d_spirv_build_op_image_dref_gather(builder, sampled_type_id, + image.sampled_image_id, coordinate_id, dref_id, + operands_mask, image_operands, image_operand_count); + } + else + { + component_idx = vkd3d_swizzle_get_component(sampler->swizzle, 0); + /* Nvidia driver requires signed integer type. */ + component_id = vkd3d_dxbc_compiler_get_constant(compiler, + VKD3D_SHADER_COMPONENT_INT, 1, &component_idx); + val_id = vkd3d_spirv_build_op_image_gather(builder, sampled_type_id, + image.sampled_image_id, coordinate_id, component_id, + operands_mask, image_operands, image_operand_count); + } + + vkd3d_dxbc_compiler_emit_store_dst_swizzled(compiler, + dst, val_id, image.sampled_type, resource->swizzle); +} + +static uint32_t vkd3d_dxbc_compiler_emit_raw_structured_addressing( + struct vkd3d_dxbc_compiler *compiler, uint32_t type_id, unsigned int stride, + const struct vkd3d_shader_src_param *src0, DWORD src0_mask, + const struct vkd3d_shader_src_param *src1, DWORD src1_mask) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_src_param *offset; + uint32_t structure_id = 0, offset_id; + DWORD offset_write_mask; + + if (stride) + { + structure_id = vkd3d_dxbc_compiler_emit_load_src(compiler, src0, src0_mask); + structure_id = vkd3d_spirv_build_op_imul(builder, type_id, + structure_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, stride)); + } + offset = stride ? src1 : src0; + offset_write_mask = stride ? src1_mask : src0_mask; + + offset_id = vkd3d_dxbc_compiler_emit_load_src(compiler, offset, offset_write_mask); + offset_id = vkd3d_spirv_build_op_shift_right_logical(builder, type_id, + offset_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, 2)); + + if (structure_id) + return vkd3d_spirv_build_op_iadd(builder, type_id, structure_id, offset_id); + else + return offset_id; +} + +static void vkd3d_dxbc_compiler_emit_ld_raw_structured_srv_uav(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + uint32_t coordinate_id, type_id, val_id, texel_type_id, ptr_type_id, ptr_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + const struct vkd3d_shader_src_param *resource; + const struct vkd3d_symbol *resource_symbol; + uint32_t base_coordinate_id, component_idx; + uint32_t constituents[VKD3D_VEC4_SIZE]; + struct vkd3d_shader_image image; + uint32_t indices[2]; + unsigned int i, j; + SpvOp op; + + resource = &src[instruction->src_count - 1]; + resource_symbol = vkd3d_dxbc_compiler_find_resource(compiler, &resource->reg); + + if (resource->reg.type == VKD3DSPR_UAV + && vkd3d_dxbc_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) + { + texel_type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, texel_type_id); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + base_coordinate_id = vkd3d_dxbc_compiler_emit_raw_structured_addressing(compiler, + type_id, resource_symbol->info.resource.structure_stride, + &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + + assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) + continue; + + component_idx = vkd3d_swizzle_get_component(resource->swizzle, i); + coordinate_id = base_coordinate_id; + if (component_idx) + coordinate_id = vkd3d_spirv_build_op_iadd(builder, type_id, + coordinate_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, component_idx)); + indices[0] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0); + indices[1] = coordinate_id; + + ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, resource_symbol->id, indices, 2); + constituents[j++] = vkd3d_spirv_build_op_load(builder, texel_type_id, ptr_id, SpvMemoryAccessMaskNone); + } + } + else + { + if (resource->reg.type == VKD3DSPR_RESOURCE) + op = SpvOpImageFetch; + else + op = SpvOpImageRead; + + vkd3d_dxbc_compiler_prepare_image(compiler, &image, &resource->reg, NULL, VKD3D_IMAGE_FLAG_NONE); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + base_coordinate_id = vkd3d_dxbc_compiler_emit_raw_structured_addressing(compiler, + type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + + texel_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); + assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) + continue; + + component_idx = vkd3d_swizzle_get_component(resource->swizzle, i); + coordinate_id = base_coordinate_id; + if (component_idx) + coordinate_id = vkd3d_spirv_build_op_iadd(builder, type_id, + coordinate_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, component_idx)); + + val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + op, texel_type_id, image.image_id, coordinate_id); + constituents[j++] = vkd3d_spirv_build_op_composite_extract1(builder, + type_id, val_id, 0); + } + } + assert(dst->reg.data_type == VKD3D_DATA_UINT); + vkd3d_dxbc_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); +} + +static void vkd3d_dxbc_compiler_emit_ld_tgsm(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t coordinate_id, type_id, ptr_type_id, ptr_id; + const struct vkd3d_shader_src_param *resource; + struct vkd3d_shader_register_info reg_info; + uint32_t base_coordinate_id, component_idx; + uint32_t constituents[VKD3D_VEC4_SIZE]; + unsigned int i, j; + + resource = &src[instruction->src_count - 1]; + if (!vkd3d_dxbc_compiler_get_register_info(compiler, &resource->reg, ®_info)) + return; + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); + base_coordinate_id = vkd3d_dxbc_compiler_emit_raw_structured_addressing(compiler, + type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + + assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) + continue; + + component_idx = vkd3d_swizzle_get_component(resource->swizzle, i); + coordinate_id = base_coordinate_id; + if (component_idx) + coordinate_id = vkd3d_spirv_build_op_iadd(builder, type_id, + coordinate_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, component_idx)); + + ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); + constituents[j++] = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); + } + assert(dst->reg.data_type == VKD3D_DATA_UINT); + vkd3d_dxbc_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); +} + +static void vkd3d_dxbc_compiler_emit_ld_raw_structured(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + enum vkd3d_shader_register_type reg_type = instruction->src[instruction->src_count - 1].reg.type; + switch (reg_type) + { + case VKD3DSPR_RESOURCE: + case VKD3DSPR_UAV: + vkd3d_dxbc_compiler_emit_ld_raw_structured_srv_uav(compiler, instruction); + break; + case VKD3DSPR_GROUPSHAREDMEM: + vkd3d_dxbc_compiler_emit_ld_tgsm(compiler, instruction); + break; + default: + ERR("Unexpected register type %#x.\n", reg_type); + } +} + +static void vkd3d_dxbc_compiler_emit_store_uav_raw_structured(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + uint32_t coordinate_id, type_id, val_id, data_id, ptr_type_id, ptr_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + const struct vkd3d_symbol *resource_symbol; + uint32_t base_coordinate_id, component_idx; + const struct vkd3d_shader_src_param *data; + struct vkd3d_shader_image image; + unsigned int component_count; + uint32_t indices[2]; + + resource_symbol = vkd3d_dxbc_compiler_find_resource(compiler, &dst->reg); + + if (vkd3d_dxbc_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) + { + type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + base_coordinate_id = vkd3d_dxbc_compiler_emit_raw_structured_addressing(compiler, + type_id, resource_symbol->info.resource.structure_stride, + &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + + data = &src[instruction->src_count - 1]; + assert(data->reg.data_type == VKD3D_DATA_UINT); + val_id = vkd3d_dxbc_compiler_emit_load_src(compiler, data, dst->write_mask); + + component_count = vkd3d_write_mask_component_count(dst->write_mask); + for (component_idx = 0; component_idx < component_count; ++component_idx) + { + data_id = component_count > 1 ? + vkd3d_spirv_build_op_composite_extract1(builder, type_id, val_id, component_idx) : val_id; + + coordinate_id = base_coordinate_id; + if (component_idx) + coordinate_id = vkd3d_spirv_build_op_iadd(builder, type_id, + coordinate_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, component_idx)); + indices[0] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0); + indices[1] = coordinate_id; + + ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, resource_symbol->id, indices, 2); + vkd3d_spirv_build_op_store(builder, ptr_id, data_id, SpvMemoryAccessMaskNone); + } + } + else + { + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + vkd3d_dxbc_compiler_prepare_image(compiler, &image, &dst->reg, NULL, VKD3D_IMAGE_FLAG_NONE); + assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !image.structure_stride); + base_coordinate_id = vkd3d_dxbc_compiler_emit_raw_structured_addressing(compiler, + type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + + data = &src[instruction->src_count - 1]; + assert(data->reg.data_type == VKD3D_DATA_UINT); + val_id = vkd3d_dxbc_compiler_emit_load_src(compiler, data, dst->write_mask); + + component_count = vkd3d_write_mask_component_count(dst->write_mask); + for (component_idx = 0; component_idx < component_count; ++component_idx) + { + /* Mesa Vulkan drivers require the texel parameter to be a vector. */ + data_id = vkd3d_dxbc_compiler_emit_construct_vector(compiler, VKD3D_SHADER_COMPONENT_UINT, + VKD3D_VEC4_SIZE, val_id, component_idx, component_count); + + coordinate_id = base_coordinate_id; + if (component_idx) + coordinate_id = vkd3d_spirv_build_op_iadd(builder, type_id, + coordinate_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, component_idx)); + + vkd3d_spirv_build_op_image_write(builder, image.image_id, coordinate_id, + data_id, SpvImageOperandsMaskNone, NULL, 0); + } + } + +} + +static void vkd3d_dxbc_compiler_emit_store_tgsm(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + uint32_t coordinate_id, type_id, val_id, ptr_type_id, ptr_id, data_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t base_coordinate_id, component_idx; + const struct vkd3d_shader_src_param *data; + struct vkd3d_shader_register_info reg_info; + unsigned int component_count; + + if (!vkd3d_dxbc_compiler_get_register_info(compiler, &dst->reg, ®_info)) + return; + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); + assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !reg_info.structure_stride); + base_coordinate_id = vkd3d_dxbc_compiler_emit_raw_structured_addressing(compiler, + type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + + data = &src[instruction->src_count - 1]; + assert(data->reg.data_type == VKD3D_DATA_UINT); + val_id = vkd3d_dxbc_compiler_emit_load_src(compiler, data, dst->write_mask); + + component_count = vkd3d_write_mask_component_count(dst->write_mask); + for (component_idx = 0; component_idx < component_count; ++component_idx) + { + data_id = component_count > 1 ? + vkd3d_spirv_build_op_composite_extract1(builder, type_id, val_id, component_idx) : val_id; + + coordinate_id = base_coordinate_id; + if (component_idx) + coordinate_id = vkd3d_spirv_build_op_iadd(builder, type_id, + coordinate_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, component_idx)); + + ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); + vkd3d_spirv_build_op_store(builder, ptr_id, data_id, SpvMemoryAccessMaskNone); + } +} + +static void vkd3d_dxbc_compiler_emit_store_raw_structured(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + enum vkd3d_shader_register_type reg_type = instruction->dst[0].reg.type; + switch (reg_type) + { + case VKD3DSPR_UAV: + vkd3d_dxbc_compiler_emit_store_uav_raw_structured(compiler, instruction); + break; + case VKD3DSPR_GROUPSHAREDMEM: + vkd3d_dxbc_compiler_emit_store_tgsm(compiler, instruction); + break; + default: + ERR("Unexpected register type %#x.\n", reg_type); + } +} + +static void vkd3d_dxbc_compiler_emit_ld_uav_typed(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t coordinate_id, type_id, val_id, ptr_type_id, ptr_id; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + const struct vkd3d_symbol *resource_symbol; + struct vkd3d_shader_image image; + DWORD coordinate_mask; + uint32_t indices[2]; + + resource_symbol = vkd3d_dxbc_compiler_find_resource(compiler, &src[1].reg); + + if (vkd3d_dxbc_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) + { + type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); + coordinate_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_0); + indices[0] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0); + indices[1] = coordinate_id; + + ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, resource_symbol->id, indices, 2); + val_id = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); + + vkd3d_dxbc_compiler_emit_store_dst_swizzled(compiler, dst, val_id, + resource_symbol->info.resource.sampled_type, src[1].swizzle); + } + else + { + vkd3d_dxbc_compiler_prepare_image(compiler, &image, &src[1].reg, NULL, VKD3D_IMAGE_FLAG_NONE); + type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); + coordinate_mask = (1u << image.resource_type_info->coordinate_component_count) - 1; + coordinate_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], coordinate_mask); + + val_id = vkd3d_spirv_build_op_image_read(builder, type_id, + image.image_id, coordinate_id, SpvImageOperandsMaskNone, NULL, 0); + + vkd3d_dxbc_compiler_emit_store_dst_swizzled(compiler, + dst, val_id, image.sampled_type, src[1].swizzle); + } +} + +static void vkd3d_dxbc_compiler_emit_store_uav_typed(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + uint32_t coordinate_id, texel_id, type_id, val_id, ptr_type_id, ptr_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + const struct vkd3d_symbol *resource_symbol; + struct vkd3d_shader_image image; + DWORD coordinate_mask; + uint32_t indices[2]; + + resource_symbol = vkd3d_dxbc_compiler_find_resource(compiler, &dst->reg); + + if (vkd3d_dxbc_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) + { + type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); + coordinate_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_0); + indices[0] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0); + indices[1] = coordinate_id; + + val_id = vkd3d_dxbc_compiler_emit_load_src_with_type(compiler, &src[1], + VKD3DSP_WRITEMASK_0, resource_symbol->info.resource.sampled_type); + ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, resource_symbol->id, indices, 2); + vkd3d_spirv_build_op_store(builder, ptr_id, val_id, SpvMemoryAccessMaskNone); + } + else + { + vkd3d_spirv_enable_capability(builder, SpvCapabilityStorageImageWriteWithoutFormat); + + vkd3d_dxbc_compiler_prepare_image(compiler, &image, &dst->reg, NULL, VKD3D_IMAGE_FLAG_NONE); + coordinate_mask = (1u << image.resource_type_info->coordinate_component_count) - 1; + coordinate_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], coordinate_mask); + texel_id = vkd3d_dxbc_compiler_emit_load_src_with_type(compiler, &src[1], dst->write_mask, image.sampled_type); + + vkd3d_spirv_build_op_image_write(builder, image.image_id, coordinate_id, texel_id, + SpvImageOperandsMaskNone, NULL, 0); + } +} + +static void vkd3d_dxbc_compiler_emit_uav_counter_instruction(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + unsigned int memory_semantics = SpvMemorySemanticsMaskNone; + uint32_t ptr_type_id, type_id, counter_id, result_id; + uint32_t coordinate_id, sample_id, pointer_id; + const struct vkd3d_symbol *resource_symbol; + uint32_t operands[3]; + SpvOp op; + + op = instruction->handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC + ? SpvOpAtomicIIncrement : SpvOpAtomicIDecrement; + + resource_symbol = vkd3d_dxbc_compiler_find_resource(compiler, &src->reg); + counter_id = resource_symbol->info.resource.uav_counter_id; + assert(counter_id); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + + if (resource_symbol->info.resource.uav_counter_array) + { + const struct vkd3d_symbol_descriptor_array_data *array_data; + uint32_t index_id; + + index_id = vkd3d_dxbc_compiler_get_descriptor_index(compiler, &src->reg, + resource_symbol->info.resource.uav_counter_array, + resource_symbol->info.resource.uav_counter_base_idx, + resource_symbol->info.resource.resource_type_info->resource_type); + + array_data = &resource_symbol->info.resource.uav_counter_array->info.descriptor_array; + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, array_data->storage_class, + array_data->contained_type_id); + + counter_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, counter_id, &index_id, 1); + } + + if (vkd3d_dxbc_compiler_is_opengl_target(compiler)) + { + pointer_id = counter_id; + memory_semantics |= SpvMemorySemanticsAtomicCounterMemoryMask; + } + else if (compiler->ssbo_uavs) + { + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); + coordinate_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0); + operands[0] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0); + operands[1] = coordinate_id; + pointer_id = vkd3d_spirv_build_op_access_chain(builder, + ptr_type_id, counter_id, operands, 2); + } + else + { + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassImage, type_id); + coordinate_id = sample_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0); + pointer_id = vkd3d_spirv_build_op_image_texel_pointer(builder, + ptr_type_id, counter_id, coordinate_id, sample_id); + } + + operands[0] = pointer_id; + operands[1] = vkd3d_dxbc_compiler_get_constant_uint(compiler, SpvScopeDevice); + operands[2] = vkd3d_dxbc_compiler_get_constant_uint(compiler, memory_semantics); + result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream, + op, type_id, operands, ARRAY_SIZE(operands)); + if (op == SpvOpAtomicIDecrement) + { + /* SpvOpAtomicIDecrement returns the original value. */ + result_id = vkd3d_spirv_build_op_isub(builder, type_id, result_id, + vkd3d_dxbc_compiler_get_constant_uint(compiler, 1)); + } + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, result_id); +} + +static SpvOp vkd3d_dxbc_compiler_map_atomic_instruction(const struct vkd3d_shader_instruction *instruction) +{ + static const struct + { + enum vkd3d_shader_opcode handler_idx; + SpvOp spirv_op; + } + atomic_ops[] = + { + {VKD3DSIH_ATOMIC_AND, SpvOpAtomicAnd}, + {VKD3DSIH_ATOMIC_CMP_STORE, SpvOpAtomicCompareExchange}, + {VKD3DSIH_ATOMIC_IADD, SpvOpAtomicIAdd}, + {VKD3DSIH_ATOMIC_IMAX, SpvOpAtomicSMax}, + {VKD3DSIH_ATOMIC_IMIN, SpvOpAtomicSMin}, + {VKD3DSIH_ATOMIC_OR, SpvOpAtomicOr}, + {VKD3DSIH_ATOMIC_UMAX, SpvOpAtomicUMax}, + {VKD3DSIH_ATOMIC_UMIN, SpvOpAtomicUMin}, + {VKD3DSIH_ATOMIC_XOR, SpvOpAtomicXor}, + {VKD3DSIH_IMM_ATOMIC_AND, SpvOpAtomicAnd}, + {VKD3DSIH_IMM_ATOMIC_CMP_EXCH, SpvOpAtomicCompareExchange}, + {VKD3DSIH_IMM_ATOMIC_EXCH, SpvOpAtomicExchange}, + {VKD3DSIH_IMM_ATOMIC_IADD, SpvOpAtomicIAdd}, + {VKD3DSIH_IMM_ATOMIC_IMAX, SpvOpAtomicSMax}, + {VKD3DSIH_IMM_ATOMIC_IMIN, SpvOpAtomicSMin}, + {VKD3DSIH_IMM_ATOMIC_OR, SpvOpAtomicOr}, + {VKD3DSIH_IMM_ATOMIC_UMAX, SpvOpAtomicUMax}, + {VKD3DSIH_IMM_ATOMIC_UMIN, SpvOpAtomicUMin}, + {VKD3DSIH_IMM_ATOMIC_XOR, SpvOpAtomicXor}, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(atomic_ops); ++i) + { + if (atomic_ops[i].handler_idx == instruction->handler_idx) + return atomic_ops[i].spirv_op; + } + + return SpvOpMax; +} + +static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode handler_idx) +{ + return VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR; +} + +static void vkd3d_dxbc_compiler_emit_atomic_instruction(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + const struct vkd3d_symbol *resource_symbol = NULL; + uint32_t ptr_type_id, type_id, val_id, result_id; + enum vkd3d_shader_component_type component_type; + const struct vkd3d_shader_dst_param *resource; + uint32_t coordinate_id, sample_id, pointer_id; + struct vkd3d_shader_register_info reg_info; + struct vkd3d_shader_image image; + unsigned int structure_stride; + DWORD coordinate_mask; + uint32_t operands[6]; + unsigned int i = 0; + SpvScope scope; + bool raw; + SpvOp op; + + resource = is_imm_atomic_instruction(instruction->handler_idx) ? &dst[1] : &dst[0]; + + op = vkd3d_dxbc_compiler_map_atomic_instruction(instruction); + if (op == SpvOpMax) + { + ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + return; + } + + if (resource->reg.type == VKD3DSPR_GROUPSHAREDMEM) + { + scope = SpvScopeWorkgroup; + coordinate_mask = 1u; + if (!vkd3d_dxbc_compiler_get_register_info(compiler, &resource->reg, ®_info)) + return; + structure_stride = reg_info.structure_stride; + raw = !structure_stride; + } + else + { + scope = SpvScopeDevice; + resource_symbol = vkd3d_dxbc_compiler_find_resource(compiler, &resource->reg); + + if (vkd3d_dxbc_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) + { + coordinate_mask = VKD3DSP_WRITEMASK_0; + structure_stride = resource_symbol->info.resource.structure_stride; + raw = resource_symbol->info.resource.raw; + } + else + { + vkd3d_dxbc_compiler_prepare_image(compiler, &image, &resource->reg, NULL, VKD3D_IMAGE_FLAG_NO_LOAD); + coordinate_mask = (1u << image.resource_type_info->coordinate_component_count) - 1; + structure_stride = image.structure_stride; + raw = image.raw; + } + } + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + if (structure_stride || raw) + { + assert(!raw != !structure_stride); + coordinate_id = vkd3d_dxbc_compiler_emit_raw_structured_addressing(compiler, + type_id, structure_stride, &src[0], VKD3DSP_WRITEMASK_0, + &src[0], VKD3DSP_WRITEMASK_1); + } + else + { + assert(resource->reg.type != VKD3DSPR_GROUPSHAREDMEM); + coordinate_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], coordinate_mask); + } + + if (resource->reg.type == VKD3DSPR_GROUPSHAREDMEM) + { + component_type = VKD3D_SHADER_COMPONENT_UINT; + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); + pointer_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); + } + else + { + if (vkd3d_dxbc_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) + { + component_type = resource_symbol->info.resource.sampled_type; + type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); + operands[0] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0); + operands[1] = coordinate_id; + pointer_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, resource_symbol->id, operands, 2); + } + else + { + component_type = image.sampled_type; + type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassImage, type_id); + sample_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0); + pointer_id = vkd3d_spirv_build_op_image_texel_pointer(builder, + ptr_type_id, image.id, coordinate_id, sample_id); + } + } + + val_id = vkd3d_dxbc_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type); + + operands[i++] = pointer_id; + operands[i++] = vkd3d_dxbc_compiler_get_constant_uint(compiler, scope); + operands[i++] = vkd3d_dxbc_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); + if (instruction->src_count >= 3) + { + operands[i++] = vkd3d_dxbc_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); + operands[i++] = vkd3d_dxbc_compiler_emit_load_src_with_type(compiler, &src[2], VKD3DSP_WRITEMASK_0, component_type); + } + operands[i++] = val_id; + result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream, + op, type_id, operands, i); + + if (is_imm_atomic_instruction(instruction->handler_idx)) + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, result_id); +} + +static void vkd3d_dxbc_compiler_emit_bufinfo(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + const struct vkd3d_symbol *resource_symbol; + uint32_t type_id, val_id, stride_id; + struct vkd3d_shader_image image; + uint32_t constituents[2]; + unsigned int write_mask; + + if (compiler->ssbo_uavs && src->reg.type == VKD3DSPR_UAV) + { + resource_symbol = vkd3d_dxbc_compiler_find_resource(compiler, &src->reg); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + val_id = vkd3d_spirv_build_op_array_length(builder, type_id, resource_symbol->id, 0); + write_mask = VKD3DSP_WRITEMASK_0; + } + else + { + vkd3d_spirv_enable_capability(builder, SpvCapabilityImageQuery); + + vkd3d_dxbc_compiler_prepare_image(compiler, &image, &src->reg, NULL, VKD3D_IMAGE_FLAG_NONE); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + val_id = vkd3d_spirv_build_op_image_query_size(builder, type_id, image.image_id); + write_mask = VKD3DSP_WRITEMASK_0; + } + + if (image.structure_stride) + { + stride_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, image.structure_stride); + constituents[0] = vkd3d_spirv_build_op_udiv(builder, type_id, val_id, stride_id); + constituents[1] = stride_id; + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, ARRAY_SIZE(constituents)); + val_id = vkd3d_spirv_build_op_composite_construct(builder, + type_id, constituents, ARRAY_SIZE(constituents)); + write_mask |= VKD3DSP_WRITEMASK_1; + } + else if (image.raw) + { + val_id = vkd3d_spirv_build_op_shift_left_logical(builder, type_id, + val_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, 2)); + } + + val_id = vkd3d_dxbc_compiler_emit_swizzle(compiler, val_id, write_mask, + VKD3D_SHADER_COMPONENT_UINT, src->swizzle, dst->write_mask); + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void vkd3d_dxbc_compiler_emit_resinfo(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, lod_id, val_id, miplevel_count_id; + uint32_t constituents[VKD3D_VEC4_SIZE]; + unsigned int i, size_component_count; + struct vkd3d_shader_image image; + bool supports_mipmaps; + + vkd3d_spirv_enable_capability(builder, SpvCapabilityImageQuery); + + vkd3d_dxbc_compiler_prepare_image(compiler, &image, &src[1].reg, NULL, VKD3D_IMAGE_FLAG_NONE); + size_component_count = image.resource_type_info->coordinate_component_count; + if (image.resource_type_info->dim == SpvDimCube) + --size_component_count; + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, size_component_count); + + supports_mipmaps = src[1].reg.type != VKD3DSPR_UAV && !image.resource_type_info->ms; + if (supports_mipmaps) + { + lod_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_0); + val_id = vkd3d_spirv_build_op_image_query_size_lod(builder, type_id, image.image_id, lod_id); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + miplevel_count_id = vkd3d_spirv_build_op_image_query_levels(builder, type_id, image.image_id); + } + else + { + val_id = vkd3d_spirv_build_op_image_query_size(builder, type_id, image.image_id); + /* For UAVs the returned miplevel count is always 1. */ + miplevel_count_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, 1); + } + + constituents[0] = val_id; + for (i = 0; i < 3 - size_component_count; ++i) + constituents[i + 1] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0); + constituents[i + 1] = miplevel_count_id; + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, VKD3D_VEC4_SIZE); + val_id = vkd3d_spirv_build_op_composite_construct(builder, + type_id, constituents, i + 2); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + if (instruction->flags == VKD3DSI_RESINFO_UINT) + { + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + } + else + { + if (instruction->flags) + FIXME("Unhandled flags %#x.\n", instruction->flags); + val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); + } + val_id = vkd3d_dxbc_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, + VKD3D_SHADER_COMPONENT_FLOAT, src[1].swizzle, dst->write_mask); + + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +static uint32_t vkd3d_dxbc_compiler_emit_query_sample_count(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_src_param *src) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + struct vkd3d_shader_image image; + uint32_t type_id, val_id; + + if (src->reg.type == VKD3DSPR_RASTERIZER) + { + val_id = vkd3d_dxbc_compiler_emit_uint_shader_parameter(compiler, + VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT); + } + else + { + vkd3d_spirv_enable_capability(builder, SpvCapabilityImageQuery); + + vkd3d_dxbc_compiler_prepare_image(compiler, &image, &src->reg, NULL, VKD3D_IMAGE_FLAG_NONE); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + val_id = vkd3d_spirv_build_op_image_query_samples(builder, type_id, image.image_id); + } + + return val_id; +} + +static void vkd3d_dxbc_compiler_emit_sample_info(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t constituents[VKD3D_VEC4_SIZE]; + uint32_t type_id, val_id; + unsigned int i; + + val_id = vkd3d_dxbc_compiler_emit_query_sample_count(compiler, src); + + constituents[0] = val_id; + for (i = 1; i < VKD3D_VEC4_SIZE; ++i) + constituents[i] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, VKD3D_VEC4_SIZE); + val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, constituents, VKD3D_VEC4_SIZE); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + if (instruction->flags == VKD3DSI_SAMPLE_INFO_UINT) + { + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + } + else + { + if (instruction->flags) + FIXME("Unhandled flags %#x.\n", instruction->flags); + val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); + } + + val_id = vkd3d_dxbc_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, + VKD3D_SHADER_COMPONENT_FLOAT, src->swizzle, dst->write_mask); + + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +/* XXX: This is correct only when standard sample positions are used. */ +static void vkd3d_dxbc_compiler_emit_sample_position(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + /* Standard sample locations from the Vulkan spec. */ + static const float standard_sample_positions[][2] = + { + /* 1 sample */ + { 0.0 / 16.0, 0.0 / 16.0}, + /* 2 samples */ + { 4.0 / 16.0, 4.0 / 16.0}, + {-4.0 / 16.0, -4.0 / 16.0}, + /* 4 samples */ + {-2.0 / 16.0, -6.0 / 16.0}, + { 6.0 / 16.0, -2.0 / 16.0}, + {-6.0 / 16.0, 2.0 / 16.0}, + { 2.0 / 16.0, 6.0 / 16.0}, + /* 8 samples */ + { 1.0 / 16.0, -3.0 / 16.0}, + {-1.0 / 16.0, 3.0 / 16.0}, + { 5.0 / 16.0, 1.0 / 16.0}, + {-3.0 / 16.0, -5.0 / 16.0}, + {-5.0 / 16.0, 5.0 / 16.0}, + {-7.0 / 16.0, -1.0 / 16.0}, + { 3.0 / 16.0, 7.0 / 16.0}, + { 7.0 / 16.0, -7.0 / 16.0}, + /* 16 samples */ + { 1.0 / 16.0, 1.0 / 16.0}, + {-1.0 / 16.0, -3.0 / 16.0}, + {-3.0 / 16.0, 2.0 / 16.0}, + { 4.0 / 16.0, -1.0 / 16.0}, + {-5.0 / 16.0, -2.0 / 16.0}, + { 2.0 / 16.0, 5.0 / 16.0}, + { 5.0 / 16.0, 3.0 / 16.0}, + { 3.0 / 16.0, -5.0 / 16.0}, + {-2.0 / 16.0, 6.0 / 16.0}, + { 0.0 / 16.0, -7.0 / 16.0}, + {-4.0 / 16.0, -6.0 / 16.0}, + {-6.0 / 16.0, 4.0 / 16.0}, + {-8.0 / 16.0, 0.0 / 16.0}, + { 7.0 / 16.0, -4.0 / 16.0}, + { 6.0 / 16.0, 7.0 / 16.0}, + {-7.0 / 16.0, -8.0 / 16.0}, + }; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t constituents[ARRAY_SIZE(standard_sample_positions)]; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + uint32_t array_type_id, length_id, index_id, id; + uint32_t sample_count_id, sample_index_id; + uint32_t type_id, bool_id, ptr_type_id; + unsigned int i; + + sample_count_id = vkd3d_dxbc_compiler_emit_query_sample_count(compiler, &instruction->src[0]); + sample_index_id = vkd3d_dxbc_compiler_emit_load_src(compiler, &instruction->src[1], VKD3DSP_WRITEMASK_0); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + index_id = vkd3d_spirv_build_op_iadd(builder, type_id, sample_count_id, sample_index_id); + index_id = vkd3d_spirv_build_op_isub(builder, + type_id, index_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, 1)); + + /* Validate sample index. */ + bool_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, 1); + id = vkd3d_spirv_build_op_logical_and(builder, bool_id, + vkd3d_spirv_build_op_uless_than(builder, bool_id, sample_index_id, sample_count_id), + vkd3d_spirv_build_op_uless_than_equal(builder, + bool_id, sample_index_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, 16))); + index_id = vkd3d_spirv_build_op_select(builder, type_id, + id, index_id, vkd3d_dxbc_compiler_get_constant_uint(compiler, 0)); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2); + if (!(id = compiler->sample_positions_id)) + { + length_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, ARRAY_SIZE(standard_sample_positions)); + array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + + for (i = 0; i < ARRAY_SIZE(standard_sample_positions); ++ i) + { + constituents[i] = vkd3d_dxbc_compiler_get_constant(compiler, + VKD3D_SHADER_COMPONENT_FLOAT, 2, (const uint32_t *)standard_sample_positions[i]); + } + + id = vkd3d_spirv_build_op_constant_composite(builder, array_type_id, constituents, ARRAY_SIZE(constituents)); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, array_type_id); + id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, ptr_type_id, SpvStorageClassPrivate, id); + vkd3d_spirv_build_op_name(builder, id, "sample_pos"); + compiler->sample_positions_id = id; + } + + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); + id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, ptr_type_id, id, index_id); + id = vkd3d_spirv_build_op_load(builder, type_id, id, SpvMemoryAccessMaskNone); + + id = vkd3d_dxbc_compiler_emit_swizzle(compiler, id, VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1, + VKD3D_SHADER_COMPONENT_FLOAT, instruction->src[0].swizzle, dst->write_mask); + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, id); +} + +static void vkd3d_dxbc_compiler_emit_eval_attrib(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + const struct vkd3d_shader_register *input = &src[0].reg; + uint32_t instr_set_id, type_id, val_id, src_ids[2]; + struct vkd3d_shader_register_info register_info; + unsigned int src_count = 0; + enum GLSLstd450 op; + + if (!vkd3d_dxbc_compiler_get_register_info(compiler, input, ®ister_info)) + return; + + if (register_info.storage_class != SpvStorageClassInput) + { + FIXME("Not supported for storage class %#x.\n", register_info.storage_class); + return; + } + + vkd3d_spirv_enable_capability(builder, SpvCapabilityInterpolationFunction); + + src_ids[src_count++] = register_info.id; + + if (instruction->handler_idx == VKD3DSIH_EVAL_CENTROID) + { + op = GLSLstd450InterpolateAtCentroid; + } + else + { + assert(instruction->handler_idx == VKD3DSIH_EVAL_SAMPLE_INDEX); + op = GLSLstd450InterpolateAtSample; + src_ids[src_count++] = vkd3d_dxbc_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); + } + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, + vkd3d_write_mask_component_count(register_info.write_mask)); + + instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); + val_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, instr_set_id, op, src_ids, src_count); + + val_id = vkd3d_dxbc_compiler_emit_swizzle(compiler, val_id, register_info.write_mask, + VKD3D_SHADER_COMPONENT_FLOAT, src[0].swizzle, dst->write_mask); + + vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, val_id); +} + +/* From the Vulkan spec: + * + * "Scope for execution must be limited to: * Workgroup * Subgroup" + * + * "Scope for memory must be limited to: * Device * Workgroup * Invocation" + */ +static void vkd3d_dxbc_compiler_emit_sync(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + unsigned int memory_semantics = SpvMemorySemanticsAcquireReleaseMask; + unsigned int flags = instruction->flags; + SpvScope execution_scope = SpvScopeMax; + SpvScope memory_scope = SpvScopeDevice; + + if (flags & VKD3DSSF_GROUP_SHARED_MEMORY) + { + memory_scope = SpvScopeWorkgroup; + memory_semantics |= SpvMemorySemanticsWorkgroupMemoryMask; + flags &= ~VKD3DSSF_GROUP_SHARED_MEMORY; + } + + if (flags & VKD3DSSF_THREAD_GROUP) + { + execution_scope = SpvScopeWorkgroup; + flags &= ~VKD3DSSF_THREAD_GROUP; + } + + if (flags & VKD3DSSF_GLOBAL_UAV) + { + memory_scope = SpvScopeDevice; + memory_semantics |= SpvMemorySemanticsImageMemoryMask; + flags &= ~VKD3DSSF_GLOBAL_UAV; + } + + if (flags) + { + FIXME("Unhandled sync flags %#x.\n", flags); + memory_scope = SpvScopeDevice; + execution_scope = SpvScopeWorkgroup; + memory_semantics |= SpvMemorySemanticsUniformMemoryMask + | SpvMemorySemanticsSubgroupMemoryMask + | SpvMemorySemanticsWorkgroupMemoryMask + | SpvMemorySemanticsCrossWorkgroupMemoryMask + | SpvMemorySemanticsAtomicCounterMemoryMask + | SpvMemorySemanticsImageMemoryMask; + } + + vkd3d_dxbc_compiler_emit_barrier(compiler, execution_scope, memory_scope, memory_semantics); +} + +static void vkd3d_dxbc_compiler_emit_emit_stream(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int stream_idx; + + if (instruction->handler_idx == VKD3DSIH_EMIT_STREAM) + stream_idx = instruction->src[0].reg.idx[0].offset; + else + stream_idx = 0; + + if (stream_idx) + { + FIXME("Multiple streams are not supported yet.\n"); + return; + } + + vkd3d_dxbc_compiler_emit_shader_epilogue_invocation(compiler); + vkd3d_spirv_build_op_emit_vertex(builder); +} + +static void vkd3d_dxbc_compiler_emit_cut_stream(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int stream_idx; + + if (instruction->handler_idx == VKD3DSIH_CUT_STREAM) + stream_idx = instruction->src[0].reg.idx[0].offset; + else + stream_idx = 0; + + if (stream_idx) + { + FIXME("Multiple streams are not supported yet.\n"); + return; + } + + vkd3d_spirv_build_op_end_primitive(builder); +} + +static void vkd3d_dxbc_compiler_emit_hull_shader_inputs(struct vkd3d_dxbc_compiler *compiler) +{ + const struct vkd3d_shader_signature *signature = compiler->input_signature; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, length_id, vicp_id, vicp_type_id; + unsigned int register_count, register_idx, i; + struct vkd3d_shader_register r; + struct vkd3d_symbol symbol; + struct rb_entry *entry; + + for (i = 0, register_count = 0; i < signature->element_count; ++i) + { + register_count = max(register_count, signature->elements[i].register_index + 1); + } + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); + length_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, compiler->input_control_point_count); + type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + length_id = vkd3d_dxbc_compiler_get_constant_uint(compiler, register_count); + type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + vicp_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); + + vicp_id = vkd3d_spirv_build_op_variable(builder, + &builder->global_stream, vicp_type_id, SpvStorageClassPrivate, 0); + vkd3d_spirv_build_op_name(builder, vicp_id, "vicp"); + + memset(&r, 0, sizeof(r)); + r.type = VKD3DSPR_INPUT; + r.idx[0].offset = 0; + r.idx[1].offset = ~0u; + vkd3d_symbol_make_register(&symbol, &r); + + for (i = 0; i < signature->element_count; ++i) + { + register_idx = signature->elements[i].register_index; + + symbol.key.reg.idx = register_idx; + if ((entry = rb_get(&compiler->symbol_table, &symbol))) + { + struct vkd3d_symbol *s = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); + s->info.reg.dcl_mask |= signature->elements[i].mask; + continue; + } + + vkd3d_symbol_set_register_info(&symbol, vicp_id, SpvStorageClassPrivate, + VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + symbol.info.reg.dcl_mask = signature->elements[i].mask; + symbol.info.reg.is_aggregate = true; + vkd3d_dxbc_compiler_put_symbol(compiler, &symbol); + } +} + +/* This function is called after declarations are processed. */ +static void vkd3d_dxbc_compiler_emit_main_prolog(struct vkd3d_dxbc_compiler *compiler) +{ + vkd3d_dxbc_compiler_emit_push_constant_buffers(compiler); + + if (compiler->xfb_info && compiler->xfb_info->element_count + && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) + vkd3d_dxbc_compiler_emit_point_size(compiler); + if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL) + vkd3d_dxbc_compiler_emit_hull_shader_inputs(compiler); +} + +static bool is_dcl_instruction(enum vkd3d_shader_opcode handler_idx) +{ + return (VKD3DSIH_DCL <= handler_idx && handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) + || handler_idx == VKD3DSIH_HS_DECLS; +} + +int vkd3d_dxbc_compiler_handle_instruction(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + int ret = VKD3D_OK; + + if (!is_dcl_instruction(instruction->handler_idx) && !compiler->after_declarations_section) + { + compiler->after_declarations_section = true; + vkd3d_dxbc_compiler_emit_main_prolog(compiler); + } + + switch (instruction->handler_idx) + { + case VKD3DSIH_DCL_GLOBAL_FLAGS: + vkd3d_dxbc_compiler_emit_dcl_global_flags(compiler, instruction); + break; + case VKD3DSIH_DCL_TEMPS: + vkd3d_dxbc_compiler_emit_dcl_temps(compiler, instruction); + break; + case VKD3DSIH_DCL_INDEXABLE_TEMP: + vkd3d_dxbc_compiler_emit_dcl_indexable_temp(compiler, instruction); + break; + case VKD3DSIH_DCL_CONSTANT_BUFFER: + vkd3d_dxbc_compiler_emit_dcl_constant_buffer(compiler, instruction); + break; + case VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER: + vkd3d_dxbc_compiler_emit_dcl_immediate_constant_buffer(compiler, instruction); + break; + case VKD3DSIH_DCL_SAMPLER: + vkd3d_dxbc_compiler_emit_dcl_sampler(compiler, instruction); + break; + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_UAV_TYPED: + vkd3d_dxbc_compiler_emit_dcl_resource(compiler, instruction); + break; + case VKD3DSIH_DCL_RESOURCE_RAW: + case VKD3DSIH_DCL_UAV_RAW: + vkd3d_dxbc_compiler_emit_dcl_resource_raw(compiler, instruction); + break; + case VKD3DSIH_DCL_RESOURCE_STRUCTURED: + case VKD3DSIH_DCL_UAV_STRUCTURED: + vkd3d_dxbc_compiler_emit_dcl_resource_structured(compiler, instruction); + break; + case VKD3DSIH_DCL_TGSM_RAW: + vkd3d_dxbc_compiler_emit_dcl_tgsm_raw(compiler, instruction); + break; + case VKD3DSIH_DCL_TGSM_STRUCTURED: + vkd3d_dxbc_compiler_emit_dcl_tgsm_structured(compiler, instruction); + break; + case VKD3DSIH_DCL_INPUT: + vkd3d_dxbc_compiler_emit_dcl_input(compiler, instruction); + break; + case VKD3DSIH_DCL_INPUT_PS: + vkd3d_dxbc_compiler_emit_dcl_input_ps(compiler, instruction); + break; + case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_PS_SIV: + vkd3d_dxbc_compiler_emit_dcl_input_ps_sysval(compiler, instruction); + break; + case VKD3DSIH_DCL_INPUT_SGV: + case VKD3DSIH_DCL_INPUT_SIV: + vkd3d_dxbc_compiler_emit_dcl_input_sysval(compiler, instruction); + break; + case VKD3DSIH_DCL_OUTPUT: + vkd3d_dxbc_compiler_emit_dcl_output(compiler, instruction); + break; + case VKD3DSIH_DCL_OUTPUT_SIV: + vkd3d_dxbc_compiler_emit_dcl_output_siv(compiler, instruction); + break; + case VKD3DSIH_DCL_INDEX_RANGE: + vkd3d_dxbc_compiler_emit_dcl_index_range(compiler, instruction); + break; + case VKD3DSIH_DCL_STREAM: + vkd3d_dxbc_compiler_emit_dcl_stream(compiler, instruction); + break; + case VKD3DSIH_DCL_VERTICES_OUT: + vkd3d_dxbc_compiler_emit_output_vertex_count(compiler, instruction); + break; + case VKD3DSIH_DCL_INPUT_PRIMITIVE: + vkd3d_dxbc_compiler_emit_dcl_input_primitive(compiler, instruction); + break; + case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: + vkd3d_dxbc_compiler_emit_dcl_output_topology(compiler, instruction); + break; + case VKD3DSIH_DCL_GS_INSTANCES: + vkd3d_dxbc_compiler_emit_dcl_gs_instances(compiler, instruction); + break; + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: + compiler->input_control_point_count = instruction->declaration.count; + break; + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: + compiler->output_control_point_count = instruction->declaration.count; + vkd3d_dxbc_compiler_emit_output_vertex_count(compiler, instruction); + break; + case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: + vkd3d_dxbc_compiler_emit_dcl_tessellator_domain(compiler, instruction); + break; + case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + vkd3d_dxbc_compiler_emit_tessellator_output_primitive(compiler, + instruction->declaration.tessellator_output_primitive); + break; + case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: + vkd3d_dxbc_compiler_emit_tessellator_partitioning(compiler, + instruction->declaration.tessellator_partitioning); + break; + case VKD3DSIH_DCL_THREAD_GROUP: + vkd3d_dxbc_compiler_emit_dcl_thread_group(compiler, instruction); + break; + case VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + ret = vkd3d_dxbc_compiler_emit_shader_phase_instance_count(compiler, instruction); + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + vkd3d_dxbc_compiler_enter_shader_phase(compiler, instruction); + break; + case VKD3DSIH_DMOV: + case VKD3DSIH_MOV: + vkd3d_dxbc_compiler_emit_mov(compiler, instruction); + break; + case VKD3DSIH_DMOVC: + case VKD3DSIH_MOVC: + vkd3d_dxbc_compiler_emit_movc(compiler, instruction); + break; + case VKD3DSIH_SWAPC: + vkd3d_dxbc_compiler_emit_swapc(compiler, instruction); + break; + case VKD3DSIH_ADD: + case VKD3DSIH_AND: + case VKD3DSIH_BFREV: + case VKD3DSIH_COUNTBITS: + case VKD3DSIH_DADD: + case VKD3DSIH_DDIV: + case VKD3DSIH_DIV: + case VKD3DSIH_DMUL: + case VKD3DSIH_DTOF: + case VKD3DSIH_DTOI: + case VKD3DSIH_DTOU: + case VKD3DSIH_FTOD: + case VKD3DSIH_FTOI: + case VKD3DSIH_FTOU: + case VKD3DSIH_IADD: + case VKD3DSIH_INEG: + case VKD3DSIH_ISHL: + case VKD3DSIH_ISHR: + case VKD3DSIH_ITOD: + case VKD3DSIH_ITOF: + case VKD3DSIH_MUL: + case VKD3DSIH_NOT: + case VKD3DSIH_OR: + case VKD3DSIH_USHR: + case VKD3DSIH_UTOD: + case VKD3DSIH_UTOF: + case VKD3DSIH_XOR: + vkd3d_dxbc_compiler_emit_alu_instruction(compiler, instruction); + break; + case VKD3DSIH_DFMA: + case VKD3DSIH_DMAX: + case VKD3DSIH_DMIN: + case VKD3DSIH_EXP: + case VKD3DSIH_FIRSTBIT_HI: + case VKD3DSIH_FIRSTBIT_LO: + case VKD3DSIH_FIRSTBIT_SHI: + case VKD3DSIH_FRC: + case VKD3DSIH_IMAX: + case VKD3DSIH_IMIN: + case VKD3DSIH_LOG: + case VKD3DSIH_MAD: + case VKD3DSIH_MAX: + case VKD3DSIH_MIN: + case VKD3DSIH_ROUND_NE: + case VKD3DSIH_ROUND_NI: + case VKD3DSIH_ROUND_PI: + case VKD3DSIH_ROUND_Z: + case VKD3DSIH_RSQ: + case VKD3DSIH_SQRT: + case VKD3DSIH_UMAX: + case VKD3DSIH_UMIN: + vkd3d_dxbc_compiler_emit_ext_glsl_instruction(compiler, instruction); + break; + case VKD3DSIH_DP4: + case VKD3DSIH_DP3: + case VKD3DSIH_DP2: + vkd3d_dxbc_compiler_emit_dot(compiler, instruction); + break; + case VKD3DSIH_DRCP: + case VKD3DSIH_RCP: + vkd3d_dxbc_compiler_emit_rcp(compiler, instruction); + break; + case VKD3DSIH_SINCOS: + vkd3d_dxbc_compiler_emit_sincos(compiler, instruction); + break; + case VKD3DSIH_IMUL: + vkd3d_dxbc_compiler_emit_imul(compiler, instruction); + break; + case VKD3DSIH_IMAD: + vkd3d_dxbc_compiler_emit_imad(compiler, instruction); + break; + case VKD3DSIH_UDIV: + vkd3d_dxbc_compiler_emit_udiv(compiler, instruction); + break; + case VKD3DSIH_DEQ: + case VKD3DSIH_DGE: + case VKD3DSIH_DLT: + case VKD3DSIH_DNE: + case VKD3DSIH_EQ: + case VKD3DSIH_GE: + case VKD3DSIH_IEQ: + case VKD3DSIH_IGE: + case VKD3DSIH_ILT: + case VKD3DSIH_INE: + case VKD3DSIH_LT: + case VKD3DSIH_NE: + case VKD3DSIH_UGE: + case VKD3DSIH_ULT: + vkd3d_dxbc_compiler_emit_comparison_instruction(compiler, instruction); + break; + case VKD3DSIH_BFI: + case VKD3DSIH_IBFE: + case VKD3DSIH_UBFE: + vkd3d_dxbc_compiler_emit_bitfield_instruction(compiler, instruction); + break; + case VKD3DSIH_F16TOF32: + vkd3d_dxbc_compiler_emit_f16tof32(compiler, instruction); + break; + case VKD3DSIH_F32TOF16: + vkd3d_dxbc_compiler_emit_f32tof16(compiler, instruction); + break; + case VKD3DSIH_BREAK: + case VKD3DSIH_BREAKP: + case VKD3DSIH_CASE: + case VKD3DSIH_CONTINUE: + case VKD3DSIH_CONTINUEP: + case VKD3DSIH_DEFAULT: + case VKD3DSIH_ELSE: + case VKD3DSIH_ENDIF: + case VKD3DSIH_ENDLOOP: + case VKD3DSIH_ENDSWITCH: + case VKD3DSIH_IF: + case VKD3DSIH_LOOP: + case VKD3DSIH_RET: + case VKD3DSIH_RETP: + case VKD3DSIH_SWITCH: + case VKD3DSIH_TEXKILL: + ret = vkd3d_dxbc_compiler_emit_control_flow_instruction(compiler, instruction); + break; + case VKD3DSIH_DSX: + case VKD3DSIH_DSX_COARSE: + case VKD3DSIH_DSX_FINE: + case VKD3DSIH_DSY: + case VKD3DSIH_DSY_COARSE: + case VKD3DSIH_DSY_FINE: + vkd3d_dxbc_compiler_emit_deriv_instruction(compiler, instruction); + break; + case VKD3DSIH_LD2DMS: + case VKD3DSIH_LD: + vkd3d_dxbc_compiler_emit_ld(compiler, instruction); + break; + case VKD3DSIH_LOD: + vkd3d_dxbc_compiler_emit_lod(compiler, instruction); + break; + case VKD3DSIH_SAMPLE: + case VKD3DSIH_SAMPLE_B: + case VKD3DSIH_SAMPLE_GRAD: + case VKD3DSIH_SAMPLE_LOD: + vkd3d_dxbc_compiler_emit_sample(compiler, instruction); + break; + case VKD3DSIH_SAMPLE_C: + case VKD3DSIH_SAMPLE_C_LZ: + vkd3d_dxbc_compiler_emit_sample_c(compiler, instruction); + break; + case VKD3DSIH_GATHER4: + case VKD3DSIH_GATHER4_C: + case VKD3DSIH_GATHER4_PO: + case VKD3DSIH_GATHER4_PO_C: + vkd3d_dxbc_compiler_emit_gather4(compiler, instruction); + break; + case VKD3DSIH_LD_RAW: + case VKD3DSIH_LD_STRUCTURED: + vkd3d_dxbc_compiler_emit_ld_raw_structured(compiler, instruction); + break; + case VKD3DSIH_STORE_RAW: + case VKD3DSIH_STORE_STRUCTURED: + vkd3d_dxbc_compiler_emit_store_raw_structured(compiler, instruction); + break; + case VKD3DSIH_LD_UAV_TYPED: + vkd3d_dxbc_compiler_emit_ld_uav_typed(compiler, instruction); + break; + case VKD3DSIH_STORE_UAV_TYPED: + vkd3d_dxbc_compiler_emit_store_uav_typed(compiler, instruction); + break; + case VKD3DSIH_IMM_ATOMIC_ALLOC: + case VKD3DSIH_IMM_ATOMIC_CONSUME: + vkd3d_dxbc_compiler_emit_uav_counter_instruction(compiler, instruction); + break; + case VKD3DSIH_ATOMIC_AND: + case VKD3DSIH_ATOMIC_CMP_STORE: + case VKD3DSIH_ATOMIC_IADD: + case VKD3DSIH_ATOMIC_IMAX: + case VKD3DSIH_ATOMIC_IMIN: + case VKD3DSIH_ATOMIC_OR: + case VKD3DSIH_ATOMIC_UMAX: + case VKD3DSIH_ATOMIC_UMIN: + case VKD3DSIH_ATOMIC_XOR: + case VKD3DSIH_IMM_ATOMIC_AND: + case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: + case VKD3DSIH_IMM_ATOMIC_EXCH: + case VKD3DSIH_IMM_ATOMIC_IADD: + case VKD3DSIH_IMM_ATOMIC_IMAX: + case VKD3DSIH_IMM_ATOMIC_IMIN: + case VKD3DSIH_IMM_ATOMIC_OR: + case VKD3DSIH_IMM_ATOMIC_UMAX: + case VKD3DSIH_IMM_ATOMIC_UMIN: + case VKD3DSIH_IMM_ATOMIC_XOR: + vkd3d_dxbc_compiler_emit_atomic_instruction(compiler, instruction); + break; + case VKD3DSIH_BUFINFO: + vkd3d_dxbc_compiler_emit_bufinfo(compiler, instruction); + break; + case VKD3DSIH_RESINFO: + vkd3d_dxbc_compiler_emit_resinfo(compiler, instruction); + break; + case VKD3DSIH_SAMPLE_INFO: + vkd3d_dxbc_compiler_emit_sample_info(compiler, instruction); + break; + case VKD3DSIH_SAMPLE_POS: + vkd3d_dxbc_compiler_emit_sample_position(compiler, instruction); + break; + case VKD3DSIH_EVAL_CENTROID: + case VKD3DSIH_EVAL_SAMPLE_INDEX: + vkd3d_dxbc_compiler_emit_eval_attrib(compiler, instruction); + break; + case VKD3DSIH_SYNC: + vkd3d_dxbc_compiler_emit_sync(compiler, instruction); + break; + case VKD3DSIH_EMIT: + case VKD3DSIH_EMIT_STREAM: + vkd3d_dxbc_compiler_emit_emit_stream(compiler, instruction); + break; + case VKD3DSIH_CUT: + case VKD3DSIH_CUT_STREAM: + vkd3d_dxbc_compiler_emit_cut_stream(compiler, instruction); + break; + case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_NOP: + /* nothing to do */ + break; + default: + FIXME("Unhandled instruction %#x.\n", instruction->handler_idx); + } + + return ret; +} + +int vkd3d_dxbc_compiler_generate_spirv(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *spirv) +{ + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_phase *phase; + + if ((phase = vkd3d_dxbc_compiler_get_current_shader_phase(compiler))) + vkd3d_dxbc_compiler_leave_shader_phase(compiler, phase); + else + vkd3d_spirv_build_op_function_end(builder); + + if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL) + vkd3d_dxbc_compiler_emit_hull_shader_main(compiler); + + if (compiler->shader_type == VKD3D_SHADER_TYPE_DOMAIN) + { + if (info && (ds_info = vkd3d_find_struct(compile_info->next, SPIRV_DOMAIN_SHADER_TARGET_INFO))) + { + vkd3d_dxbc_compiler_emit_tessellator_output_primitive(compiler, ds_info->output_primitive); + vkd3d_dxbc_compiler_emit_tessellator_partitioning(compiler, ds_info->partitioning); + } + else if (vkd3d_dxbc_compiler_is_opengl_target(compiler)) + { + ERR("vkd3d_shader_spirv_domain_shader_target_info is required for " + "OpenGL tessellation evaluation shader.\n"); + } + } + + if (compiler->epilogue_function_id) + { + vkd3d_spirv_build_op_name(builder, compiler->epilogue_function_id, "epilogue"); + vkd3d_dxbc_compiler_emit_shader_epilogue_function(compiler); + } + + if (compiler->strip_debug) + vkd3d_spirv_stream_clear(&builder->debug_stream); + + if (!vkd3d_spirv_compile_module(builder, spirv, vkd3d_dxbc_compiler_get_entry_point_name(compiler))) + return VKD3D_ERROR; + + if (TRACE_ON()) + { + enum vkd3d_shader_spirv_environment environment = vkd3d_dxbc_compiler_get_target_environment(compiler); + vkd3d_spirv_dump(spirv, environment); + vkd3d_spirv_validate(spirv, environment); + } + + if (compiler->failed) + return VKD3D_ERROR_INVALID_SHADER; + + if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) + { + struct vkd3d_shader_code text; + enum vkd3d_shader_spirv_environment environment = vkd3d_dxbc_compiler_get_target_environment(compiler); + if (vkd3d_spirv_binary_to_text(spirv, environment, compiler->formatting, &text) != VKD3D_OK) + return VKD3D_ERROR; + *spirv = text; + } + + return VKD3D_OK; +} + +void vkd3d_dxbc_compiler_destroy(struct vkd3d_dxbc_compiler *compiler) +{ + vkd3d_free(compiler->control_flow_info); + + vkd3d_free(compiler->output_info); + + vkd3d_free(compiler->push_constants); + vkd3d_free(compiler->descriptor_offset_ids); + + vkd3d_spirv_builder_free(&compiler->spirv_builder); + + rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); + + vkd3d_free(compiler->shader_phases); + vkd3d_free(compiler->spec_constants); + + vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); + + vkd3d_free(compiler); +} diff --git a/libs/vkd3d/libs/vkd3d-shader/trace.c b/libs/vkd3d/libs/vkd3d-shader/trace.c new file mode 100644 index 00000000000..c292c847147 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/trace.c @@ -0,0 +1,2008 @@ +/* + * Copyright 2002-2003 Jason Edmeades + * Copyright 2002-2003 Raphael Junqueira + * Copyright 2004 Christian Costa + * Copyright 2005 Oliver Stieber + * Copyright 2006 Ivan Gyurdiev + * Copyright 2007-2008, 2013 Stefan Dösinger for CodeWeavers + * Copyright 2009-2011 Henri Verbeet for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" + +#include +#include + +static const char * const shader_opcode_names[] = +{ + [VKD3DSIH_ABS ] = "abs", + [VKD3DSIH_ADD ] = "add", + [VKD3DSIH_AND ] = "and", + [VKD3DSIH_ATOMIC_AND ] = "atomic_and", + [VKD3DSIH_ATOMIC_CMP_STORE ] = "atomic_cmp_store", + [VKD3DSIH_ATOMIC_IADD ] = "atomic_iadd", + [VKD3DSIH_ATOMIC_IMAX ] = "atomic_imax", + [VKD3DSIH_ATOMIC_IMIN ] = "atomic_imin", + [VKD3DSIH_ATOMIC_OR ] = "atomic_or", + [VKD3DSIH_ATOMIC_UMAX ] = "atomic_umax", + [VKD3DSIH_ATOMIC_UMIN ] = "atomic_umin", + [VKD3DSIH_ATOMIC_XOR ] = "atomic_xor", + [VKD3DSIH_BEM ] = "bem", + [VKD3DSIH_BFI ] = "bfi", + [VKD3DSIH_BFREV ] = "bfrev", + [VKD3DSIH_BREAK ] = "break", + [VKD3DSIH_BREAKC ] = "breakc", + [VKD3DSIH_BREAKP ] = "breakp", + [VKD3DSIH_BUFINFO ] = "bufinfo", + [VKD3DSIH_CALL ] = "call", + [VKD3DSIH_CALLNZ ] = "callnz", + [VKD3DSIH_CASE ] = "case", + [VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED ] = "check_access_fully_mapped", + [VKD3DSIH_CMP ] = "cmp", + [VKD3DSIH_CND ] = "cnd", + [VKD3DSIH_CONTINUE ] = "continue", + [VKD3DSIH_CONTINUEP ] = "continuec", + [VKD3DSIH_COUNTBITS ] = "countbits", + [VKD3DSIH_CRS ] = "crs", + [VKD3DSIH_CUT ] = "cut", + [VKD3DSIH_CUT_STREAM ] = "cut_stream", + [VKD3DSIH_DADD ] = "dadd", + [VKD3DSIH_DCL ] = "dcl", + [VKD3DSIH_DCL_CONSTANT_BUFFER ] = "dcl_constantBuffer", + [VKD3DSIH_DCL_FUNCTION_BODY ] = "dcl_function_body", + [VKD3DSIH_DCL_FUNCTION_TABLE ] = "dcl_function_table", + [VKD3DSIH_DCL_GLOBAL_FLAGS ] = "dcl_globalFlags", + [VKD3DSIH_DCL_GS_INSTANCES ] = "dcl_gs_instances", + [VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT] = "dcl_hs_fork_phase_instance_count", + [VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT] = "dcl_hs_join_phase_instance_count", + [VKD3DSIH_DCL_HS_MAX_TESSFACTOR ] = "dcl_hs_max_tessfactor", + [VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER ] = "dcl_immediateConstantBuffer", + [VKD3DSIH_DCL_INDEX_RANGE ] = "dcl_index_range", + [VKD3DSIH_DCL_INDEXABLE_TEMP ] = "dcl_indexableTemp", + [VKD3DSIH_DCL_INPUT ] = "dcl_input", + [VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT ] = "dcl_input_control_point_count", + [VKD3DSIH_DCL_INPUT_PRIMITIVE ] = "dcl_inputPrimitive", + [VKD3DSIH_DCL_INPUT_PS ] = "dcl_input_ps", + [VKD3DSIH_DCL_INPUT_PS_SGV ] = "dcl_input_ps_sgv", + [VKD3DSIH_DCL_INPUT_PS_SIV ] = "dcl_input_ps_siv", + [VKD3DSIH_DCL_INPUT_SGV ] = "dcl_input_sgv", + [VKD3DSIH_DCL_INPUT_SIV ] = "dcl_input_siv", + [VKD3DSIH_DCL_INTERFACE ] = "dcl_interface", + [VKD3DSIH_DCL_OUTPUT ] = "dcl_output", + [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT ] = "dcl_output_control_point_count", + [VKD3DSIH_DCL_OUTPUT_SIV ] = "dcl_output_siv", + [VKD3DSIH_DCL_OUTPUT_TOPOLOGY ] = "dcl_outputTopology", + [VKD3DSIH_DCL_RESOURCE_RAW ] = "dcl_resource_raw", + [VKD3DSIH_DCL_RESOURCE_STRUCTURED ] = "dcl_resource_structured", + [VKD3DSIH_DCL_SAMPLER ] = "dcl_sampler", + [VKD3DSIH_DCL_STREAM ] = "dcl_stream", + [VKD3DSIH_DCL_TEMPS ] = "dcl_temps", + [VKD3DSIH_DCL_TESSELLATOR_DOMAIN ] = "dcl_tessellator_domain", + [VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE] = "dcl_tessellator_output_primitive", + [VKD3DSIH_DCL_TESSELLATOR_PARTITIONING ] = "dcl_tessellator_partitioning", + [VKD3DSIH_DCL_TGSM_RAW ] = "dcl_tgsm_raw", + [VKD3DSIH_DCL_TGSM_STRUCTURED ] = "dcl_tgsm_structured", + [VKD3DSIH_DCL_THREAD_GROUP ] = "dcl_thread_group", + [VKD3DSIH_DCL_UAV_RAW ] = "dcl_uav_raw", + [VKD3DSIH_DCL_UAV_STRUCTURED ] = "dcl_uav_structured", + [VKD3DSIH_DCL_UAV_TYPED ] = "dcl_uav_typed", + [VKD3DSIH_DCL_VERTICES_OUT ] = "dcl_maxOutputVertexCount", + [VKD3DSIH_DDIV ] = "ddiv", + [VKD3DSIH_DEF ] = "def", + [VKD3DSIH_DEFAULT ] = "default", + [VKD3DSIH_DEFB ] = "defb", + [VKD3DSIH_DEFI ] = "defi", + [VKD3DSIH_DEQ ] = "deq", + [VKD3DSIH_DFMA ] = "dfma", + [VKD3DSIH_DGE ] = "dge", + [VKD3DSIH_DIV ] = "div", + [VKD3DSIH_DLT ] = "dlt", + [VKD3DSIH_DMAX ] = "dmax", + [VKD3DSIH_DMIN ] = "dmin", + [VKD3DSIH_DMOV ] = "dmov", + [VKD3DSIH_DMOVC ] = "dmovc", + [VKD3DSIH_DMUL ] = "dmul", + [VKD3DSIH_DNE ] = "dne", + [VKD3DSIH_DP2 ] = "dp2", + [VKD3DSIH_DP2ADD ] = "dp2add", + [VKD3DSIH_DP3 ] = "dp3", + [VKD3DSIH_DP4 ] = "dp4", + [VKD3DSIH_DRCP ] = "drcp", + [VKD3DSIH_DST ] = "dst", + [VKD3DSIH_DSX ] = "dsx", + [VKD3DSIH_DSX_COARSE ] = "deriv_rtx_coarse", + [VKD3DSIH_DSX_FINE ] = "deriv_rtx_fine", + [VKD3DSIH_DSY ] = "dsy", + [VKD3DSIH_DSY_COARSE ] = "deriv_rty_coarse", + [VKD3DSIH_DSY_FINE ] = "deriv_rty_fine", + [VKD3DSIH_DTOF ] = "dtof", + [VKD3DSIH_DTOI ] = "dtoi", + [VKD3DSIH_DTOU ] = "dtou", + [VKD3DSIH_ELSE ] = "else", + [VKD3DSIH_EMIT ] = "emit", + [VKD3DSIH_EMIT_STREAM ] = "emit_stream", + [VKD3DSIH_ENDIF ] = "endif", + [VKD3DSIH_ENDLOOP ] = "endloop", + [VKD3DSIH_ENDREP ] = "endrep", + [VKD3DSIH_ENDSWITCH ] = "endswitch", + [VKD3DSIH_EQ ] = "eq", + [VKD3DSIH_EVAL_CENTROID ] = "eval_centroid", + [VKD3DSIH_EVAL_SAMPLE_INDEX ] = "eval_sample_index", + [VKD3DSIH_EXP ] = "exp", + [VKD3DSIH_EXPP ] = "expp", + [VKD3DSIH_F16TOF32 ] = "f16tof32", + [VKD3DSIH_F32TOF16 ] = "f32tof16", + [VKD3DSIH_FCALL ] = "fcall", + [VKD3DSIH_FIRSTBIT_HI ] = "firstbit_hi", + [VKD3DSIH_FIRSTBIT_LO ] = "firstbit_lo", + [VKD3DSIH_FIRSTBIT_SHI ] = "firstbit_shi", + [VKD3DSIH_FRC ] = "frc", + [VKD3DSIH_FTOD ] = "ftod", + [VKD3DSIH_FTOI ] = "ftoi", + [VKD3DSIH_FTOU ] = "ftou", + [VKD3DSIH_GATHER4 ] = "gather4", + [VKD3DSIH_GATHER4_C ] = "gather4_c", + [VKD3DSIH_GATHER4_C_S ] = "gather4_c_s", + [VKD3DSIH_GATHER4_PO ] = "gather4_po", + [VKD3DSIH_GATHER4_PO_C ] = "gather4_po_c", + [VKD3DSIH_GATHER4_PO_C_S ] = "gather4_po_c_s", + [VKD3DSIH_GATHER4_PO_S ] = "gather4_po_s", + [VKD3DSIH_GATHER4_S ] = "gather4_s", + [VKD3DSIH_GE ] = "ge", + [VKD3DSIH_HS_CONTROL_POINT_PHASE ] = "hs_control_point_phase", + [VKD3DSIH_HS_DECLS ] = "hs_decls", + [VKD3DSIH_HS_FORK_PHASE ] = "hs_fork_phase", + [VKD3DSIH_HS_JOIN_PHASE ] = "hs_join_phase", + [VKD3DSIH_IADD ] = "iadd", + [VKD3DSIH_IBFE ] = "ibfe", + [VKD3DSIH_IEQ ] = "ieq", + [VKD3DSIH_IF ] = "if", + [VKD3DSIH_IFC ] = "ifc", + [VKD3DSIH_IGE ] = "ige", + [VKD3DSIH_ILT ] = "ilt", + [VKD3DSIH_IMAD ] = "imad", + [VKD3DSIH_IMAX ] = "imax", + [VKD3DSIH_IMIN ] = "imin", + [VKD3DSIH_IMM_ATOMIC_ALLOC ] = "imm_atomic_alloc", + [VKD3DSIH_IMM_ATOMIC_AND ] = "imm_atomic_and", + [VKD3DSIH_IMM_ATOMIC_CMP_EXCH ] = "imm_atomic_cmp_exch", + [VKD3DSIH_IMM_ATOMIC_CONSUME ] = "imm_atomic_consume", + [VKD3DSIH_IMM_ATOMIC_EXCH ] = "imm_atomic_exch", + [VKD3DSIH_IMM_ATOMIC_IADD ] = "imm_atomic_iadd", + [VKD3DSIH_IMM_ATOMIC_IMAX ] = "imm_atomic_imax", + [VKD3DSIH_IMM_ATOMIC_IMIN ] = "imm_atomic_imin", + [VKD3DSIH_IMM_ATOMIC_OR ] = "imm_atomic_or", + [VKD3DSIH_IMM_ATOMIC_UMAX ] = "imm_atomic_umax", + [VKD3DSIH_IMM_ATOMIC_UMIN ] = "imm_atomic_umin", + [VKD3DSIH_IMM_ATOMIC_XOR ] = "imm_atomic_xor", + [VKD3DSIH_IMUL ] = "imul", + [VKD3DSIH_INE ] = "ine", + [VKD3DSIH_INEG ] = "ineg", + [VKD3DSIH_ISHL ] = "ishl", + [VKD3DSIH_ISHR ] = "ishr", + [VKD3DSIH_ITOD ] = "itod", + [VKD3DSIH_ITOF ] = "itof", + [VKD3DSIH_LABEL ] = "label", + [VKD3DSIH_LD ] = "ld", + [VKD3DSIH_LD2DMS ] = "ld2dms", + [VKD3DSIH_LD2DMS_S ] = "ld2dms_s", + [VKD3DSIH_LD_RAW ] = "ld_raw", + [VKD3DSIH_LD_RAW_S ] = "ld_raw_s", + [VKD3DSIH_LD_S ] = "ld_s", + [VKD3DSIH_LD_STRUCTURED ] = "ld_structured", + [VKD3DSIH_LD_STRUCTURED_S ] = "ld_structured_s", + [VKD3DSIH_LD_UAV_TYPED ] = "ld_uav_typed", + [VKD3DSIH_LD_UAV_TYPED_S ] = "ld_uav_typed_s", + [VKD3DSIH_LIT ] = "lit", + [VKD3DSIH_LOD ] = "lod", + [VKD3DSIH_LOG ] = "log", + [VKD3DSIH_LOGP ] = "logp", + [VKD3DSIH_LOOP ] = "loop", + [VKD3DSIH_LRP ] = "lrp", + [VKD3DSIH_LT ] = "lt", + [VKD3DSIH_M3x2 ] = "m3x2", + [VKD3DSIH_M3x3 ] = "m3x3", + [VKD3DSIH_M3x4 ] = "m3x4", + [VKD3DSIH_M4x3 ] = "m4x3", + [VKD3DSIH_M4x4 ] = "m4x4", + [VKD3DSIH_MAD ] = "mad", + [VKD3DSIH_MAX ] = "max", + [VKD3DSIH_MIN ] = "min", + [VKD3DSIH_MOV ] = "mov", + [VKD3DSIH_MOVA ] = "mova", + [VKD3DSIH_MOVC ] = "movc", + [VKD3DSIH_MUL ] = "mul", + [VKD3DSIH_NE ] = "ne", + [VKD3DSIH_NOP ] = "nop", + [VKD3DSIH_NOT ] = "not", + [VKD3DSIH_NRM ] = "nrm", + [VKD3DSIH_OR ] = "or", + [VKD3DSIH_PHASE ] = "phase", + [VKD3DSIH_POW ] = "pow", + [VKD3DSIH_RCP ] = "rcp", + [VKD3DSIH_REP ] = "rep", + [VKD3DSIH_RESINFO ] = "resinfo", + [VKD3DSIH_RET ] = "ret", + [VKD3DSIH_RETP ] = "retp", + [VKD3DSIH_ROUND_NE ] = "round_ne", + [VKD3DSIH_ROUND_NI ] = "round_ni", + [VKD3DSIH_ROUND_PI ] = "round_pi", + [VKD3DSIH_ROUND_Z ] = "round_z", + [VKD3DSIH_RSQ ] = "rsq", + [VKD3DSIH_SAMPLE ] = "sample", + [VKD3DSIH_SAMPLE_B ] = "sample_b", + [VKD3DSIH_SAMPLE_B_CL_S ] = "sample_b_cl_s", + [VKD3DSIH_SAMPLE_C ] = "sample_c", + [VKD3DSIH_SAMPLE_C_CL_S ] = "sample_c_cl_s", + [VKD3DSIH_SAMPLE_C_LZ ] = "sample_c_lz", + [VKD3DSIH_SAMPLE_C_LZ_S ] = "sample_c_lz_s", + [VKD3DSIH_SAMPLE_CL_S ] = "sample_cl_s", + [VKD3DSIH_SAMPLE_GRAD ] = "sample_d", + [VKD3DSIH_SAMPLE_GRAD_CL_S ] = "sample_d_cl_s", + [VKD3DSIH_SAMPLE_INFO ] = "sample_info", + [VKD3DSIH_SAMPLE_LOD ] = "sample_l", + [VKD3DSIH_SAMPLE_LOD_S ] = "sample_l_s", + [VKD3DSIH_SAMPLE_POS ] = "sample_pos", + [VKD3DSIH_SETP ] = "setp", + [VKD3DSIH_SGE ] = "sge", + [VKD3DSIH_SGN ] = "sgn", + [VKD3DSIH_SINCOS ] = "sincos", + [VKD3DSIH_SLT ] = "slt", + [VKD3DSIH_SQRT ] = "sqrt", + [VKD3DSIH_STORE_RAW ] = "store_raw", + [VKD3DSIH_STORE_STRUCTURED ] = "store_structured", + [VKD3DSIH_STORE_UAV_TYPED ] = "store_uav_typed", + [VKD3DSIH_SUB ] = "sub", + [VKD3DSIH_SWAPC ] = "swapc", + [VKD3DSIH_SWITCH ] = "switch", + [VKD3DSIH_SYNC ] = "sync", + [VKD3DSIH_TEX ] = "texld", + [VKD3DSIH_TEXBEM ] = "texbem", + [VKD3DSIH_TEXBEML ] = "texbeml", + [VKD3DSIH_TEXCOORD ] = "texcrd", + [VKD3DSIH_TEXDEPTH ] = "texdepth", + [VKD3DSIH_TEXDP3 ] = "texdp3", + [VKD3DSIH_TEXDP3TEX ] = "texdp3tex", + [VKD3DSIH_TEXKILL ] = "texkill", + [VKD3DSIH_TEXLDD ] = "texldd", + [VKD3DSIH_TEXLDL ] = "texldl", + [VKD3DSIH_TEXM3x2DEPTH ] = "texm3x2depth", + [VKD3DSIH_TEXM3x2PAD ] = "texm3x2pad", + [VKD3DSIH_TEXM3x2TEX ] = "texm3x2tex", + [VKD3DSIH_TEXM3x3 ] = "texm3x3", + [VKD3DSIH_TEXM3x3DIFF ] = "texm3x3diff", + [VKD3DSIH_TEXM3x3PAD ] = "texm3x3pad", + [VKD3DSIH_TEXM3x3SPEC ] = "texm3x3spec", + [VKD3DSIH_TEXM3x3TEX ] = "texm3x3tex", + [VKD3DSIH_TEXM3x3VSPEC ] = "texm3x3vspec", + [VKD3DSIH_TEXREG2AR ] = "texreg2ar", + [VKD3DSIH_TEXREG2GB ] = "texreg2gb", + [VKD3DSIH_TEXREG2RGB ] = "texreg2rgb", + [VKD3DSIH_UBFE ] = "ubfe", + [VKD3DSIH_UDIV ] = "udiv", + [VKD3DSIH_UGE ] = "uge", + [VKD3DSIH_ULT ] = "ult", + [VKD3DSIH_UMAX ] = "umax", + [VKD3DSIH_UMIN ] = "umin", + [VKD3DSIH_UMUL ] = "umul", + [VKD3DSIH_USHR ] = "ushr", + [VKD3DSIH_UTOD ] = "utod", + [VKD3DSIH_UTOF ] = "utof", + [VKD3DSIH_XOR ] = "xor", +}; + +static const struct +{ + enum vkd3d_shader_input_sysval_semantic sysval_semantic; + const char *sysval_name; +} +shader_input_sysval_semantic_names[] = +{ + {VKD3D_SIV_POSITION, "position"}, + {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, + {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, + {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, + {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, + {VKD3D_SIV_VERTEX_ID, "vertex_id"}, + {VKD3D_SIV_INSTANCE_ID, "instance_id"}, + {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, + {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, + {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, + {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, + {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, + {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, + {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, + {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, + {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, + {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, + {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, + {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, + {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, + {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, + {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, +}; + +struct vkd3d_d3d_asm_colours +{ + const char *reset; + const char *error; + const char *literal; + const char *modifier; + const char *opcode; + const char *reg; + const char *swizzle; + const char *version; + const char *write_mask; +}; + +struct vkd3d_d3d_asm_compiler +{ + struct vkd3d_string_buffer buffer; + struct vkd3d_shader_version shader_version; + struct vkd3d_d3d_asm_colours colours; +}; + +static int shader_ver_ge(const struct vkd3d_shader_version *v, int major, int minor) +{ + return v->major > major || (v->major == major && v->minor >= minor); +} + +static int VKD3D_PRINTF_FUNC(2, 3) shader_addline(struct vkd3d_string_buffer *buffer, const char *format, ...) +{ + va_list args; + int ret; + + va_start(args, format); + ret = vkd3d_string_buffer_vprintf(buffer, format, args); + va_end(args); + + return ret; +} + +/* Convert floating point offset relative to a register file to an absolute + * offset for float constants. */ +static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type register_type, UINT register_idx) +{ + switch (register_type) + { + case VKD3DSPR_CONST: return register_idx; + case VKD3DSPR_CONST2: return 2048 + register_idx; + case VKD3DSPR_CONST3: return 4096 + register_idx; + case VKD3DSPR_CONST4: return 6144 + register_idx; + default: + FIXME("Unsupported register type: %u.\n", register_type); + return register_idx; + } +} + +static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t global_flags) +{ + unsigned int i; + + static const struct + { + unsigned int flag; + const char *name; + } + global_flag_info[] = + { + {VKD3DSGF_REFACTORING_ALLOWED, "refactoringAllowed"}, + {VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL, "forceEarlyDepthStencil"}, + {VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS, "enableRawAndStructuredBuffers"}, + {VKD3DSGF_ENABLE_MINIMUM_PRECISION, "enableMinimumPrecision"}, + {VKD3DSGF_SKIP_OPTIMIZATION, "skipOptimization"}, + {VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS, "enableDoublePrecisionFloatOps"}, + {VKD3DSGF_ENABLE_11_1_DOUBLE_EXTENSIONS, "enable11_1DoubleExtensions"}, + }; + + for (i = 0; i < ARRAY_SIZE(global_flag_info); ++i) + { + if (global_flags & global_flag_info[i].flag) + { + vkd3d_string_buffer_printf(&compiler->buffer, "%s", global_flag_info[i].name); + global_flags &= ~global_flag_info[i].flag; + if (global_flags) + vkd3d_string_buffer_printf(&compiler->buffer, " | "); + } + } + + if (global_flags) + vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#x)", global_flags); +} + +static void shader_dump_sync_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t sync_flags) +{ + if (sync_flags & VKD3DSSF_GLOBAL_UAV) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_uglobal"); + sync_flags &= ~VKD3DSSF_GLOBAL_UAV; + } + if (sync_flags & VKD3DSSF_GROUP_SHARED_MEMORY) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_g"); + sync_flags &= ~VKD3DSSF_GROUP_SHARED_MEMORY; + } + if (sync_flags & VKD3DSSF_THREAD_GROUP) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_t"); + sync_flags &= ~VKD3DSSF_THREAD_GROUP; + } + + if (sync_flags) + vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", sync_flags); +} + +static void shader_dump_precise_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t flags) +{ + if (!(flags & VKD3DSI_PRECISE_XYZW)) + return; + + vkd3d_string_buffer_printf(&compiler->buffer, " [precise"); + if (flags != VKD3DSI_PRECISE_XYZW) + { + vkd3d_string_buffer_printf(&compiler->buffer, "(%s%s%s%s)", + flags & VKD3DSI_PRECISE_X ? "x" : "", + flags & VKD3DSI_PRECISE_Y ? "y" : "", + flags & VKD3DSI_PRECISE_Z ? "z" : "", + flags & VKD3DSI_PRECISE_W ? "w" : ""); + } + vkd3d_string_buffer_printf(&compiler->buffer, "]"); +} + +static void shader_dump_uav_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t uav_flags) +{ + if (uav_flags & VKD3DSUF_GLOBALLY_COHERENT) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_glc"); + uav_flags &= ~VKD3DSUF_GLOBALLY_COHERENT; + } + if (uav_flags & VKD3DSUF_ORDER_PRESERVING_COUNTER) + { + vkd3d_string_buffer_printf(&compiler->buffer, "_opc"); + uav_flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; + } + + if (uav_flags) + vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", uav_flags); +} + +static void shader_dump_tessellator_domain(struct vkd3d_d3d_asm_compiler *compiler, + enum vkd3d_tessellator_domain domain) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + + shader_addline(buffer, "domain_"); + switch (domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: + shader_addline(buffer, "isoline"); + break; + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + shader_addline(buffer, "tri"); + break; + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + shader_addline(buffer, "quad"); + break; + default: + shader_addline(buffer, "unknown_tessellator_domain(%#x)", domain); + break; + } +} + +static void shader_dump_tessellator_output_primitive(struct vkd3d_d3d_asm_compiler *compiler, + enum vkd3d_shader_tessellator_output_primitive output_primitive) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + + shader_addline(buffer, "output_"); + switch (output_primitive) + { + case VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT: + shader_addline(buffer, "point"); + break; + case VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE: + shader_addline(buffer, "line"); + break; + case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW: + shader_addline(buffer, "triangle_cw"); + break; + case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW: + shader_addline(buffer, "triangle_ccw"); + break; + default: + shader_addline(buffer, "unknown_tessellator_output_primitive(%#x)", output_primitive); + break; + } +} + +static void shader_dump_tessellator_partitioning(struct vkd3d_d3d_asm_compiler *compiler, + enum vkd3d_shader_tessellator_partitioning partitioning) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + + shader_addline(buffer, "partitioning_"); + switch (partitioning) + { + case VKD3D_SHADER_TESSELLATOR_PARTITIONING_INTEGER: + shader_addline(buffer, "integer"); + break; + case VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2: + shader_addline(buffer, "pow2"); + break; + case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: + shader_addline(buffer, "fractional_odd"); + break; + case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: + shader_addline(buffer, "fractional_even"); + break; + default: + shader_addline(buffer, "unknown_tessellator_partitioning(%#x)", partitioning); + break; + } +} + +static void shader_dump_shader_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, + enum vkd3d_shader_input_sysval_semantic semantic) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(shader_input_sysval_semantic_names); ++i) + { + if (shader_input_sysval_semantic_names[i].sysval_semantic == semantic) + { + vkd3d_string_buffer_printf(&compiler->buffer, "%s", shader_input_sysval_semantic_names[i].sysval_name); + return; + } + } + + vkd3d_string_buffer_printf(&compiler->buffer, "unknown_shader_input_sysval_semantic(%#x)", semantic); +} + +static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, enum vkd3d_shader_resource_type type) +{ + static const char *const resource_type_names[] = + { + /* VKD3D_SHADER_RESOURCE_NONE */ "none", + /* VKD3D_SHADER_RESOURCE_BUFFER */ "buffer", + /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ "texture1d", + /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ "texture2d", + /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ "texture2dms", + /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ "texture3d", + /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ "texturecube", + /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ "texture1darray", + /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ "texture2darray", + /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ "texture2dmsarray", + /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ "texturecubearray", + }; + + if (type < ARRAY_SIZE(resource_type_names)) + vkd3d_string_buffer_printf(&compiler->buffer, "%s", resource_type_names[type]); + else + vkd3d_string_buffer_printf(&compiler->buffer, "unknown"); +} + +static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, const enum vkd3d_data_type *type) +{ + static const char *const data_type_names[] = + { + /* VKD3D_DATA_FLOAT */ "float", + /* VKD3D_DATA_INT */ "int", + /* VKD3D_DATA_RESOURCE */ "resource", + /* VKD3D_DATA_SAMPLER */ "sampler", + /* VKD3D_DATA_UAV */ "uav", + /* VKD3D_DATA_UINT */ "uint", + /* VKD3D_DATA_UNORM */ "unorm", + /* VKD3D_DATA_SNORM */ "snorm", + /* VKD3D_DATA_OPAQUE */ "opaque", + /* VKD3D_DATA_MIXED */ "mixed", + /* VKD3D_DATA_DOUBLE */ "double", + /* VKD3D_DATA_CONTINUED */ "", + /* VKD3D_DATA_UNUSED */ "", + }; + const char *name; + int i; + + vkd3d_string_buffer_printf(&compiler->buffer, "("); + + for (i = 0; i < 4; i++) + { + if (type[i] < ARRAY_SIZE(data_type_names)) + name = data_type_names[type[i]]; + else + name = "unknown"; + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s", i == 0 ? "" : ",", name); + } + + vkd3d_string_buffer_printf(&compiler->buffer, ")"); +} + +static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_semantic *semantic, uint32_t flags) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + + if (semantic->resource.reg.reg.type == VKD3DSPR_SAMPLER) + { + switch (semantic->resource_type) + { + case VKD3D_SHADER_RESOURCE_TEXTURE_2D: + shader_addline(buffer, "_2d"); + break; + + case VKD3D_SHADER_RESOURCE_TEXTURE_3D: + shader_addline(buffer, "_3d"); + break; + + case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: + shader_addline(buffer, "_cube"); + break; + + default: + shader_addline(buffer, "_unknown_resource_type(%#x)", semantic->resource_type); + break; + } + } + else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) + { + if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) + shader_addline(buffer, "_resource_"); + + shader_dump_resource_type(compiler, semantic->resource_type); + if (semantic->resource.reg.reg.type == VKD3DSPR_UAV) + shader_dump_uav_flags(compiler, flags); + shader_dump_data_type(compiler, semantic->resource_data_type); + } + else + { + /* Pixel shaders 3.0 don't have usage semantics. */ + if (!shader_ver_ge(&compiler->shader_version, 3, 0) + && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) + return; + else + shader_addline(buffer, "_"); + + switch (semantic->usage) + { + case VKD3D_DECL_USAGE_POSITION: + shader_addline(buffer, "position%u", semantic->usage_idx); + break; + + case VKD3D_DECL_USAGE_BLEND_INDICES: + shader_addline(buffer, "blend"); + break; + + case VKD3D_DECL_USAGE_BLEND_WEIGHT: + shader_addline(buffer, "weight"); + break; + + case VKD3D_DECL_USAGE_NORMAL: + shader_addline(buffer, "normal%u", semantic->usage_idx); + break; + + case VKD3D_DECL_USAGE_PSIZE: + shader_addline(buffer, "psize"); + break; + + case VKD3D_DECL_USAGE_COLOR: + if (!semantic->usage_idx) + shader_addline(buffer, "color"); + else + shader_addline(buffer, "specular%u", (semantic->usage_idx - 1)); + break; + + case VKD3D_DECL_USAGE_TEXCOORD: + shader_addline(buffer, "texture%u", semantic->usage_idx); + break; + + case VKD3D_DECL_USAGE_TANGENT: + shader_addline(buffer, "tangent"); + break; + + case VKD3D_DECL_USAGE_BINORMAL: + shader_addline(buffer, "binormal"); + break; + + case VKD3D_DECL_USAGE_TESS_FACTOR: + shader_addline(buffer, "tessfactor"); + break; + + case VKD3D_DECL_USAGE_POSITIONT: + shader_addline(buffer, "positionT%u", semantic->usage_idx); + break; + + case VKD3D_DECL_USAGE_FOG: + shader_addline(buffer, "fog"); + break; + + case VKD3D_DECL_USAGE_DEPTH: + shader_addline(buffer, "depth"); + break; + + case VKD3D_DECL_USAGE_SAMPLE: + shader_addline(buffer, "sample"); + break; + + default: + shader_addline(buffer, "", semantic->usage); + FIXME("Unrecognised semantic usage %#x.\n", semantic->usage); + } + } +} + +static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_src_param *param); + +static void shader_print_float_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, float f, const char *suffix) +{ + const char *sign = ""; + + if (isfinite(f) && signbit(f)) + { + sign = "-"; + f = -f; + } + + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", prefix, sign, compiler->colours.literal); + vkd3d_string_buffer_print_f32(&compiler->buffer, f); + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s", compiler->colours.reset, suffix); +} + +static void shader_print_double_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, double d, const char *suffix) +{ + const char *sign = ""; + + if (isfinite(d) && signbit(d)) + { + sign = "-"; + d = -d; + } + + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", prefix, sign, compiler->colours.literal); + vkd3d_string_buffer_print_f64(&compiler->buffer, d); + vkd3d_string_buffer_printf(&compiler->buffer, "l%s%s", compiler->colours.reset, suffix); +} + +static void shader_print_int_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, int i, const char *suffix) +{ + if (i < 0) + vkd3d_string_buffer_printf(&compiler->buffer, "%s-%s%d%s%s", + prefix, compiler->colours.literal, -i, compiler->colours.reset, suffix); + else + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%d%s%s", + prefix, compiler->colours.literal, i, compiler->colours.reset, suffix); +} + +static void shader_print_uint_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, unsigned int i, const char *suffix) +{ + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%u%s%s", + prefix, compiler->colours.literal, i, compiler->colours.reset, suffix); +} + +static void shader_print_hex_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, unsigned int i, const char *suffix) +{ + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s0x%08x%s%s", + prefix, compiler->colours.literal, i, compiler->colours.reset, suffix); +} + +static void shader_print_bool_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, unsigned int b, const char *suffix) +{ + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s%s%s", prefix, + compiler->colours.literal, b ? "true" : "false", compiler->colours.reset, suffix); +} + +static void shader_print_subscript(struct vkd3d_d3d_asm_compiler *compiler, + unsigned int offset, const struct vkd3d_shader_src_param *rel_addr) +{ + vkd3d_string_buffer_printf(&compiler->buffer, "["); + if (rel_addr) + { + shader_dump_src_param(compiler, rel_addr); + vkd3d_string_buffer_printf(&compiler->buffer, " + "); + } + shader_print_uint_literal(compiler, "", offset, "]"); +} + +static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler, + unsigned int offset_first, unsigned int offset_last) +{ + shader_print_uint_literal(compiler, "[", offset_first, ":"); + if (offset_last != ~0u) + shader_print_uint_literal(compiler, "", offset_last, "]"); + else + vkd3d_string_buffer_printf(&compiler->buffer, "*]"); +} + +static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg, + bool is_declaration) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + unsigned int offset = reg->idx[0].offset; + bool is_descriptor = false; + + static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"}; + static const char * const misctype_reg_names[] = {"vPos", "vFace"}; + + shader_addline(buffer, "%s", compiler->colours.reg); + switch (reg->type) + { + case VKD3DSPR_TEMP: + shader_addline(buffer, "r"); + break; + + case VKD3DSPR_INPUT: + shader_addline(buffer, "v"); + break; + + case VKD3DSPR_CONST: + case VKD3DSPR_CONST2: + case VKD3DSPR_CONST3: + case VKD3DSPR_CONST4: + shader_addline(buffer, "c"); + offset = shader_get_float_offset(reg->type, offset); + break; + + case VKD3DSPR_TEXTURE: /* vs: case VKD3DSPR_ADDR */ + vkd3d_string_buffer_printf(buffer, "%c", + compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL ? 't' : 'a'); + break; + + case VKD3DSPR_RASTOUT: + shader_addline(buffer, "%s", rastout_reg_names[offset]); + break; + + case VKD3DSPR_COLOROUT: + shader_addline(buffer, "o"); + if (!shader_ver_ge(&compiler->shader_version, 4, 0)) + shader_addline(buffer, "C"); + break; + + case VKD3DSPR_DEPTHOUT: + shader_addline(buffer, "oDepth"); + break; + + case VKD3DSPR_DEPTHOUTGE: + shader_addline(buffer, "oDepthGE"); + break; + + case VKD3DSPR_DEPTHOUTLE: + shader_addline(buffer, "oDepthLE"); + break; + + case VKD3DSPR_ATTROUT: + shader_addline(buffer, "oD"); + break; + + case VKD3DSPR_TEXCRDOUT: + /* Vertex shaders >= 3.0 use general purpose output registers + * (VKD3DSPR_OUTPUT), which can include an address token. */ + if (shader_ver_ge(&compiler->shader_version, 3, 0)) + shader_addline(buffer, "o"); + else + shader_addline(buffer, "oT"); + break; + + case VKD3DSPR_CONSTINT: + shader_addline(buffer, "i"); + break; + + case VKD3DSPR_CONSTBOOL: + shader_addline(buffer, "b"); + break; + + case VKD3DSPR_LABEL: + shader_addline(buffer, "l"); + break; + + case VKD3DSPR_LOOP: + shader_addline(buffer, "aL"); + break; + + case VKD3DSPR_SAMPLER: + shader_addline(buffer, "s"); + is_descriptor = true; + break; + + case VKD3DSPR_MISCTYPE: + if (offset > 1) + { + FIXME("Unhandled misctype register %u.\n", offset); + shader_addline(buffer, "", offset); + } + else + { + shader_addline(buffer, "%s", misctype_reg_names[offset]); + } + break; + + case VKD3DSPR_PREDICATE: + shader_addline(buffer, "p"); + break; + + case VKD3DSPR_IMMCONST: + shader_addline(buffer, "l"); + break; + + case VKD3DSPR_IMMCONST64: + shader_addline(buffer, "d"); + break; + + case VKD3DSPR_CONSTBUFFER: + shader_addline(buffer, "cb"); + is_descriptor = true; + break; + + case VKD3DSPR_IMMCONSTBUFFER: + shader_addline(buffer, "icb"); + break; + + case VKD3DSPR_PRIMID: + shader_addline(buffer, "primID"); + break; + + case VKD3DSPR_NULL: + shader_addline(buffer, "null"); + break; + + case VKD3DSPR_RASTERIZER: + shader_addline(buffer, "rasterizer"); + break; + + case VKD3DSPR_RESOURCE: + shader_addline(buffer, "t"); + is_descriptor = true; + break; + + case VKD3DSPR_UAV: + shader_addline(buffer, "u"); + is_descriptor = true; + break; + + case VKD3DSPR_OUTPOINTID: + shader_addline(buffer, "vOutputControlPointID"); + break; + + case VKD3DSPR_FORKINSTID: + shader_addline(buffer, "vForkInstanceId"); + break; + + case VKD3DSPR_JOININSTID: + shader_addline(buffer, "vJoinInstanceId"); + break; + + case VKD3DSPR_INCONTROLPOINT: + shader_addline(buffer, "vicp"); + break; + + case VKD3DSPR_OUTCONTROLPOINT: + shader_addline(buffer, "vocp"); + break; + + case VKD3DSPR_PATCHCONST: + shader_addline(buffer, "vpc"); + break; + + case VKD3DSPR_TESSCOORD: + shader_addline(buffer, "vDomainLocation"); + break; + + case VKD3DSPR_GROUPSHAREDMEM: + shader_addline(buffer, "g"); + break; + + case VKD3DSPR_THREADID: + shader_addline(buffer, "vThreadID"); + break; + + case VKD3DSPR_THREADGROUPID: + shader_addline(buffer, "vThreadGroupID"); + break; + + case VKD3DSPR_LOCALTHREADID: + shader_addline(buffer, "vThreadIDInGroup"); + break; + + case VKD3DSPR_LOCALTHREADINDEX: + shader_addline(buffer, "vThreadIDInGroupFlattened"); + break; + + case VKD3DSPR_IDXTEMP: + shader_addline(buffer, "x"); + break; + + case VKD3DSPR_STREAM: + shader_addline(buffer, "m"); + break; + + case VKD3DSPR_FUNCTIONBODY: + shader_addline(buffer, "fb"); + break; + + case VKD3DSPR_FUNCTIONPOINTER: + shader_addline(buffer, "fp"); + break; + + case VKD3DSPR_COVERAGE: + shader_addline(buffer, "vCoverage"); + break; + + case VKD3DSPR_SAMPLEMASK: + shader_addline(buffer, "oMask"); + break; + + case VKD3DSPR_GSINSTID: + shader_addline(buffer, "vGSInstanceID"); + break; + + case VKD3DSPR_OUTSTENCILREF: + shader_addline(buffer, "oStencilRef"); + break; + + default: + shader_addline(buffer, "", reg->type); + break; + } + + if (reg->type == VKD3DSPR_IMMCONST) + { + shader_addline(buffer, "%s(", compiler->colours.reset); + switch (reg->immconst_type) + { + case VKD3D_IMMCONST_SCALAR: + switch (reg->data_type) + { + case VKD3D_DATA_FLOAT: + shader_print_float_literal(compiler, "", reg->u.immconst_float[0], ""); + break; + case VKD3D_DATA_INT: + shader_print_int_literal(compiler, "", reg->u.immconst_uint[0], ""); + break; + case VKD3D_DATA_RESOURCE: + case VKD3D_DATA_SAMPLER: + case VKD3D_DATA_UINT: + shader_print_uint_literal(compiler, "", reg->u.immconst_uint[0], ""); + break; + default: + shader_addline(buffer, "", reg->data_type); + break; + } + break; + + case VKD3D_IMMCONST_VEC4: + switch (reg->data_type) + { + case VKD3D_DATA_FLOAT: + shader_print_float_literal(compiler, "", reg->u.immconst_float[0], ""); + shader_print_float_literal(compiler, ", ", reg->u.immconst_float[1], ""); + shader_print_float_literal(compiler, ", ", reg->u.immconst_float[2], ""); + shader_print_float_literal(compiler, ", ", reg->u.immconst_float[3], ""); + break; + case VKD3D_DATA_INT: + shader_print_int_literal(compiler, "", reg->u.immconst_uint[0], ""); + shader_print_int_literal(compiler, ", ", reg->u.immconst_uint[1], ""); + shader_print_int_literal(compiler, ", ", reg->u.immconst_uint[2], ""); + shader_print_int_literal(compiler, ", ", reg->u.immconst_uint[3], ""); + break; + case VKD3D_DATA_RESOURCE: + case VKD3D_DATA_SAMPLER: + case VKD3D_DATA_UINT: + shader_print_uint_literal(compiler, "", reg->u.immconst_uint[0], ""); + shader_print_uint_literal(compiler, ", ", reg->u.immconst_uint[1], ""); + shader_print_uint_literal(compiler, ", ", reg->u.immconst_uint[2], ""); + shader_print_uint_literal(compiler, ", ", reg->u.immconst_uint[3], ""); + break; + default: + shader_addline(buffer, "", reg->data_type); + break; + } + break; + + default: + shader_addline(buffer, "", reg->immconst_type); + break; + } + shader_addline(buffer, ")"); + } + else if (reg->type == VKD3DSPR_IMMCONST64) + { + shader_addline(buffer, "%s(", compiler->colours.reset); + /* A double2 vector is treated as a float4 vector in enum vkd3d_immconst_type. */ + if (reg->immconst_type == VKD3D_IMMCONST_SCALAR || reg->immconst_type == VKD3D_IMMCONST_VEC4) + { + if (reg->data_type == VKD3D_DATA_DOUBLE) + { + shader_print_double_literal(compiler, "", reg->u.immconst_double[0], ""); + if (reg->immconst_type == VKD3D_IMMCONST_VEC4) + shader_print_double_literal(compiler, ", ", reg->u.immconst_double[1], ""); + } + else + { + shader_addline(buffer, "", reg->data_type); + } + } + else + { + shader_addline(buffer, "", reg->immconst_type); + } + shader_addline(buffer, ")"); + } + else if (reg->type != VKD3DSPR_RASTOUT + && reg->type != VKD3DSPR_MISCTYPE + && reg->type != VKD3DSPR_NULL) + { + if (offset != ~0u) + { + bool is_sm_5_1 = shader_ver_ge(&compiler->shader_version, 5, 1); + + if (reg->idx[0].rel_addr || reg->type == VKD3DSPR_IMMCONSTBUFFER + || reg->type == VKD3DSPR_INCONTROLPOINT || (reg->type == VKD3DSPR_INPUT + && (compiler->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY + || compiler->shader_version.type == VKD3D_SHADER_TYPE_HULL))) + { + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); + shader_print_subscript(compiler, offset, reg->idx[0].rel_addr); + } + else + { + vkd3d_string_buffer_printf(buffer, "%u%s", offset, compiler->colours.reset); + } + + /* For sm 5.1 descriptor declarations we need to print the register range instead of + * a single register index. */ + if (is_descriptor && is_declaration && is_sm_5_1) + { + shader_print_subscript_range(compiler, reg->idx[1].offset, reg->idx[2].offset); + } + else + { + /* For descriptors in sm < 5.1 we move the reg->idx values up one slot + * to normalise with 5.1. + * Here we should ignore it if it's a descriptor in sm < 5.1. */ + if (reg->idx[1].offset != ~0u && (!is_descriptor || is_sm_5_1)) + shader_print_subscript(compiler, reg->idx[1].offset, reg->idx[1].rel_addr); + + if (reg->idx[2].offset != ~0u) + shader_print_subscript(compiler, reg->idx[2].offset, reg->idx[2].rel_addr); + } + } + else + { + shader_addline(buffer, "%s", compiler->colours.reset); + } + + if (reg->type == VKD3DSPR_FUNCTIONPOINTER) + shader_print_subscript(compiler, reg->u.fp_body_idx, NULL); + } + else + { + shader_addline(buffer, "%s", compiler->colours.reset); + } +} + +static void shader_print_precision(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + const char *precision; + + if (reg->precision == VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) + return; + + switch (reg->precision) + { + case VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16: + precision = "min16f"; + break; + + case VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10: + precision = "min2_8f"; + break; + + case VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16: + precision = "min16i"; + break; + + case VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16: + precision = "min16u"; + break; + + default: + vkd3d_string_buffer_printf(buffer, " {%s%s}", + compiler->colours.error, reg->precision, compiler->colours.reset); + return; + } + vkd3d_string_buffer_printf(buffer, " {%s%s%s}", compiler->colours.modifier, precision, compiler->colours.reset); +} + +static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg) +{ + if (reg->non_uniform) + vkd3d_string_buffer_printf(&compiler->buffer, " {%snonuniform%s}", + compiler->colours.modifier, compiler->colours.reset); +} + +static void shader_dump_dst_param(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_dst_param *param, bool is_declaration) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + uint32_t write_mask = param->write_mask; + + shader_dump_register(compiler, ¶m->reg, is_declaration); + + if (write_mask) + { + static const char write_mask_chars[] = "xyzw"; + + if (param->reg.data_type == VKD3D_DATA_DOUBLE) + write_mask = vkd3d_write_mask_32_from_64(write_mask); + + shader_addline(buffer, ".%s", compiler->colours.write_mask); + if (write_mask & VKD3DSP_WRITEMASK_0) + shader_addline(buffer, "%c", write_mask_chars[0]); + if (write_mask & VKD3DSP_WRITEMASK_1) + shader_addline(buffer, "%c", write_mask_chars[1]); + if (write_mask & VKD3DSP_WRITEMASK_2) + shader_addline(buffer, "%c", write_mask_chars[2]); + if (write_mask & VKD3DSP_WRITEMASK_3) + shader_addline(buffer, "%c", write_mask_chars[3]); + shader_addline(buffer, "%s", compiler->colours.reset); + } + + shader_print_precision(compiler, ¶m->reg); + shader_print_non_uniform(compiler, ¶m->reg); +} + +static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_src_param *param) +{ + enum vkd3d_shader_src_modifier src_modifier = param->modifiers; + struct vkd3d_string_buffer *buffer = &compiler->buffer; + uint32_t swizzle = param->swizzle; + + if (src_modifier == VKD3DSPSM_NEG + || src_modifier == VKD3DSPSM_BIASNEG + || src_modifier == VKD3DSPSM_SIGNNEG + || src_modifier == VKD3DSPSM_X2NEG + || src_modifier == VKD3DSPSM_ABSNEG) + shader_addline(buffer, "-"); + else if (src_modifier == VKD3DSPSM_COMP) + shader_addline(buffer, "1-"); + else if (src_modifier == VKD3DSPSM_NOT) + shader_addline(buffer, "!"); + + if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) + shader_addline(buffer, "|"); + + shader_dump_register(compiler, ¶m->reg, false); + + switch (src_modifier) + { + case VKD3DSPSM_NONE: break; + case VKD3DSPSM_NEG: break; + case VKD3DSPSM_NOT: break; + case VKD3DSPSM_BIAS: shader_addline(buffer, "_bias"); break; + case VKD3DSPSM_BIASNEG: shader_addline(buffer, "_bias"); break; + case VKD3DSPSM_SIGN: shader_addline(buffer, "_bx2"); break; + case VKD3DSPSM_SIGNNEG: shader_addline(buffer, "_bx2"); break; + case VKD3DSPSM_COMP: break; + case VKD3DSPSM_X2: shader_addline(buffer, "_x2"); break; + case VKD3DSPSM_X2NEG: shader_addline(buffer, "_x2"); break; + case VKD3DSPSM_DZ: shader_addline(buffer, "_dz"); break; + case VKD3DSPSM_DW: shader_addline(buffer, "_dw"); break; + case VKD3DSPSM_ABSNEG: + case VKD3DSPSM_ABS: /* handled later */ break; + default: shader_addline(buffer, "_unknown_modifier(%#x)", src_modifier); + } + + if (param->reg.type != VKD3DSPR_IMMCONST && param->reg.type != VKD3DSPR_IMMCONST64 + && param->reg.type != VKD3DSPR_SAMPLER) + { + unsigned int swizzle_x = vkd3d_swizzle_get_component(swizzle, 0); + unsigned int swizzle_y = vkd3d_swizzle_get_component(swizzle, 1); + unsigned int swizzle_z = vkd3d_swizzle_get_component(swizzle, 2); + unsigned int swizzle_w = vkd3d_swizzle_get_component(swizzle, 3); + + static const char swizzle_chars[] = "xyzw"; + + if (swizzle_x == swizzle_y + && swizzle_x == swizzle_z + && swizzle_x == swizzle_w) + { + shader_addline(buffer, ".%s%c%s", compiler->colours.swizzle, + swizzle_chars[swizzle_x], compiler->colours.reset); + } + else + { + shader_addline(buffer, ".%s%c%c%c%c%s", compiler->colours.swizzle, + swizzle_chars[swizzle_x], swizzle_chars[swizzle_y], + swizzle_chars[swizzle_z], swizzle_chars[swizzle_w], compiler->colours.reset); + } + } + if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) + shader_addline(buffer, "|"); + + shader_print_precision(compiler, ¶m->reg); + shader_print_non_uniform(compiler, ¶m->reg); +} + +static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_dst_param *dst) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + uint32_t mmask = dst->modifiers; + + switch (dst->shift) + { + case 0: break; + case 13: shader_addline(buffer, "_d8"); break; + case 14: shader_addline(buffer, "_d4"); break; + case 15: shader_addline(buffer, "_d2"); break; + case 1: shader_addline(buffer, "_x2"); break; + case 2: shader_addline(buffer, "_x4"); break; + case 3: shader_addline(buffer, "_x8"); break; + default: shader_addline(buffer, "_unhandled_shift(%d)", dst->shift); break; + } + + if (mmask & VKD3DSPDM_SATURATE) shader_addline(buffer, "_sat"); + if (mmask & VKD3DSPDM_PARTIALPRECISION) shader_addline(buffer, "_pp"); + if (mmask & VKD3DSPDM_MSAMPCENTROID) shader_addline(buffer, "_centroid"); + + mmask &= ~(VKD3DSPDM_SATURATE | VKD3DSPDM_PARTIALPRECISION | VKD3DSPDM_MSAMPCENTROID); + if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask); +} + +static void shader_dump_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_primitive_type *primitive_type) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + + switch (primitive_type->type) + { + case VKD3D_PT_UNDEFINED: + shader_addline(buffer, "undefined"); + break; + case VKD3D_PT_POINTLIST: + shader_addline(buffer, "pointlist"); + break; + case VKD3D_PT_LINELIST: + shader_addline(buffer, "linelist"); + break; + case VKD3D_PT_LINESTRIP: + shader_addline(buffer, "linestrip"); + break; + case VKD3D_PT_TRIANGLELIST: + shader_addline(buffer, "trianglelist"); + break; + case VKD3D_PT_TRIANGLESTRIP: + shader_addline(buffer, "trianglestrip"); + break; + case VKD3D_PT_TRIANGLEFAN: + shader_addline(buffer, "trianglefan"); + break; + case VKD3D_PT_LINELIST_ADJ: + shader_addline(buffer, "linelist_adj"); + break; + case VKD3D_PT_LINESTRIP_ADJ: + shader_addline(buffer, "linestrip_adj"); + break; + case VKD3D_PT_TRIANGLELIST_ADJ: + shader_addline(buffer, "trianglelist_adj"); + break; + case VKD3D_PT_TRIANGLESTRIP_ADJ: + shader_addline(buffer, "trianglestrip_adj"); + break; + case VKD3D_PT_PATCH: + shader_addline(buffer, "patch%u", primitive_type->patch_vertex_count); + break; + default: + shader_addline(buffer, "", primitive_type->type); + break; + } +} + +static void shader_dump_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, + enum vkd3d_shader_interpolation_mode interpolation_mode) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + + switch (interpolation_mode) + { + case VKD3DSIM_CONSTANT: + shader_addline(buffer, "constant"); + break; + case VKD3DSIM_LINEAR: + shader_addline(buffer, "linear"); + break; + case VKD3DSIM_LINEAR_CENTROID: + shader_addline(buffer, "linear centroid"); + break; + case VKD3DSIM_LINEAR_NOPERSPECTIVE: + shader_addline(buffer, "linear noperspective"); + break; + case VKD3DSIM_LINEAR_SAMPLE: + shader_addline(buffer, "linear sample"); + break; + case VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID: + shader_addline(buffer, "linear noperspective centroid"); + break; + case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: + shader_addline(buffer, "linear noperspective sample"); + break; + default: + shader_addline(buffer, "", interpolation_mode); + break; + } +} + +const char *shader_get_type_prefix(enum vkd3d_shader_type type) +{ + switch (type) + { + case VKD3D_SHADER_TYPE_VERTEX: + return "vs"; + + case VKD3D_SHADER_TYPE_HULL: + return "hs"; + + case VKD3D_SHADER_TYPE_DOMAIN: + return "ds"; + + case VKD3D_SHADER_TYPE_GEOMETRY: + return "gs"; + + case VKD3D_SHADER_TYPE_PIXEL: + return "ps"; + + case VKD3D_SHADER_TYPE_COMPUTE: + return "cs"; + + case VKD3D_SHADER_TYPE_EFFECT: + return "fx"; + + case VKD3D_SHADER_TYPE_TEXTURE: + return "tx"; + + case VKD3D_SHADER_TYPE_LIBRARY: + return "lib"; + + default: + FIXME("Unhandled shader type %#x.\n", type); + return "unknown"; + } +} + +static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + + switch (ins->handler_idx) + { + case VKD3DSIH_BREAKP: + case VKD3DSIH_CONTINUEP: + case VKD3DSIH_IF: + case VKD3DSIH_RETP: + case VKD3DSIH_TEXKILL: + switch (ins->flags) + { + case VKD3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(buffer, "_nz"); break; + case VKD3D_SHADER_CONDITIONAL_OP_Z: shader_addline(buffer, "_z"); break; + default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); break; + } + break; + + case VKD3DSIH_IFC: + case VKD3DSIH_BREAKC: + switch (ins->flags) + { + case VKD3D_SHADER_REL_OP_GT: shader_addline(buffer, "_gt"); break; + case VKD3D_SHADER_REL_OP_EQ: shader_addline(buffer, "_eq"); break; + case VKD3D_SHADER_REL_OP_GE: shader_addline(buffer, "_ge"); break; + case VKD3D_SHADER_REL_OP_LT: shader_addline(buffer, "_lt"); break; + case VKD3D_SHADER_REL_OP_NE: shader_addline(buffer, "_ne"); break; + case VKD3D_SHADER_REL_OP_LE: shader_addline(buffer, "_le"); break; + default: shader_addline(buffer, "_(%u)", ins->flags); + } + break; + + case VKD3DSIH_RESINFO: + switch (ins->flags) + { + case VKD3DSI_NONE: break; + case VKD3DSI_RESINFO_RCP_FLOAT: shader_addline(buffer, "_rcpFloat"); break; + case VKD3DSI_RESINFO_UINT: shader_addline(buffer, "_uint"); break; + default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); + } + break; + + case VKD3DSIH_SAMPLE_INFO: + switch (ins->flags) + { + case VKD3DSI_NONE: break; + case VKD3DSI_SAMPLE_INFO_UINT: shader_addline(buffer, "_uint"); break; + default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); + } + break; + + case VKD3DSIH_SYNC: + shader_dump_sync_flags(compiler, ins->flags); + break; + + case VKD3DSIH_TEX: + if (shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) + shader_addline(buffer, "p"); + break; + + default: + shader_dump_precise_flags(compiler, ins->flags); + break; + } +} + +static void shader_dump_register_space(struct vkd3d_d3d_asm_compiler *compiler, unsigned int register_space) +{ + if (shader_ver_ge(&compiler->shader_version, 5, 1)) + shader_print_uint_literal(compiler, ", space=", register_space, ""); +} + +static void shader_print_opcode(struct vkd3d_d3d_asm_compiler *compiler, enum vkd3d_shader_opcode opcode) +{ + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", compiler->colours.opcode, + shader_opcode_names[opcode], compiler->colours.reset); +} + +static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + unsigned int i; + + if (ins->predicate) + { + vkd3d_string_buffer_printf(buffer, "("); + shader_dump_src_param(compiler, ins->predicate); + vkd3d_string_buffer_printf(buffer, ") "); + } + + /* PixWin marks instructions with the coissue flag with a '+' */ + if (ins->coissue) + vkd3d_string_buffer_printf(buffer, "+"); + + shader_print_opcode(compiler, ins->handler_idx); + + switch (ins->handler_idx) + { + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_UAV_TYPED: + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.opcode); + shader_dump_decl_usage(compiler, &ins->declaration.semantic, ins->flags); + shader_dump_ins_modifiers(compiler, &ins->declaration.semantic.resource.reg); + vkd3d_string_buffer_printf(buffer, "%s ", compiler->colours.reset); + shader_dump_register(compiler, &ins->declaration.semantic.resource.reg.reg, true); + shader_dump_register_space(compiler, ins->declaration.semantic.resource.range.space); + break; + + case VKD3DSIH_DCL_CONSTANT_BUFFER: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_register(compiler, &ins->declaration.cb.src.reg, true); + if (shader_ver_ge(&compiler->shader_version, 5, 1)) + shader_print_subscript(compiler, ins->declaration.cb.size, NULL); + shader_addline(buffer, ", %s", + ins->flags & VKD3DSI_INDEXED_DYNAMIC ? "dynamicIndexed" : "immediateIndexed"); + shader_dump_register_space(compiler, ins->declaration.cb.range.space); + break; + + case VKD3DSIH_DCL_FUNCTION_BODY: + vkd3d_string_buffer_printf(buffer, " fb%u", ins->declaration.index); + break; + + case VKD3DSIH_DCL_FUNCTION_TABLE: + vkd3d_string_buffer_printf(buffer, " ft%u = {...}", ins->declaration.index); + break; + + case VKD3DSIH_DCL_GLOBAL_FLAGS: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_global_flags(compiler, ins->flags); + break; + + case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: + shader_print_float_literal(compiler, " ", ins->declaration.max_tessellation_factor, ""); + break; + + case VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER: + vkd3d_string_buffer_printf(buffer, " {\n"); + for (i = 0; i < ins->declaration.icb->vec4_count; ++i) + { + shader_print_hex_literal(compiler, " {", ins->declaration.icb->data[4 * i + 0], ""); + shader_print_hex_literal(compiler, ", ", ins->declaration.icb->data[4 * i + 1], ""); + shader_print_hex_literal(compiler, ", ", ins->declaration.icb->data[4 * i + 2], ""); + shader_print_hex_literal(compiler, ", ", ins->declaration.icb->data[4 * i + 3], "},\n"); + } + shader_addline(buffer, "}"); + break; + + case VKD3DSIH_DCL_INDEX_RANGE: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_dst_param(compiler, &ins->declaration.index_range.dst, true); + shader_print_uint_literal(compiler, " ", ins->declaration.index_range.register_count, ""); + break; + + case VKD3DSIH_DCL_INDEXABLE_TEMP: + vkd3d_string_buffer_printf(buffer, " %sx%u%s", compiler->colours.reg, + ins->declaration.indexable_temp.register_idx, compiler->colours.reset); + shader_print_subscript(compiler, ins->declaration.indexable_temp.register_size, NULL); + shader_print_uint_literal(compiler, ", ", ins->declaration.indexable_temp.component_count, ""); + break; + + case VKD3DSIH_DCL_INPUT_PS: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_interpolation_mode(compiler, ins->flags); + shader_addline(buffer, " "); + shader_dump_dst_param(compiler, &ins->declaration.dst, true); + break; + + case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_SGV: + case VKD3DSIH_DCL_INPUT_SIV: + case VKD3DSIH_DCL_OUTPUT_SIV: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_dst_param(compiler, &ins->declaration.register_semantic.reg, true); + shader_addline(buffer, ", "); + shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); + break; + + case VKD3DSIH_DCL_INPUT_PS_SIV: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_interpolation_mode(compiler, ins->flags); + shader_addline(buffer, " "); + shader_dump_dst_param(compiler, &ins->declaration.register_semantic.reg, true); + shader_addline(buffer, ", "); + shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); + break; + + case VKD3DSIH_DCL_INPUT: + case VKD3DSIH_DCL_OUTPUT: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_dst_param(compiler, &ins->declaration.dst, true); + break; + + case VKD3DSIH_DCL_INPUT_PRIMITIVE: + case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_primitive_type(compiler, &ins->declaration.primitive_type); + break; + + case VKD3DSIH_DCL_INTERFACE: + vkd3d_string_buffer_printf(buffer, " fp%u", ins->declaration.fp.index); + shader_print_subscript(compiler, ins->declaration.fp.array_size, NULL); + shader_print_subscript(compiler, ins->declaration.fp.body_count, NULL); + vkd3d_string_buffer_printf(buffer, " = {...}"); + break; + + case VKD3DSIH_DCL_RESOURCE_RAW: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_dst_param(compiler, &ins->declaration.raw_resource.resource.reg, true); + shader_dump_register_space(compiler, ins->declaration.raw_resource.resource.range.space); + break; + + case VKD3DSIH_DCL_RESOURCE_STRUCTURED: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_dst_param(compiler, &ins->declaration.structured_resource.resource.reg, true); + shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); + shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); + break; + + case VKD3DSIH_DCL_SAMPLER: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_register(compiler, &ins->declaration.sampler.src.reg, true); + if (ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE) + shader_addline(buffer, ", comparisonMode"); + shader_dump_register_space(compiler, ins->declaration.sampler.range.space); + break; + + case VKD3DSIH_DCL_TEMPS: + case VKD3DSIH_DCL_GS_INSTANCES: + case VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: + case VKD3DSIH_DCL_VERTICES_OUT: + shader_print_uint_literal(compiler, " ", ins->declaration.count, ""); + break; + + case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_tessellator_domain(compiler, ins->declaration.tessellator_domain); + break; + + case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_tessellator_output_primitive(compiler, ins->declaration.tessellator_output_primitive); + break; + + case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_tessellator_partitioning(compiler, ins->declaration.tessellator_partitioning); + break; + + case VKD3DSIH_DCL_TGSM_RAW: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_dst_param(compiler, &ins->declaration.tgsm_raw.reg, true); + shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_raw.byte_count, ""); + break; + + case VKD3DSIH_DCL_TGSM_STRUCTURED: + vkd3d_string_buffer_printf(buffer, " "); + shader_dump_dst_param(compiler, &ins->declaration.tgsm_structured.reg, true); + shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.byte_stride, ""); + shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.structure_count, ""); + break; + + case VKD3DSIH_DCL_THREAD_GROUP: + shader_print_uint_literal(compiler, " ", ins->declaration.thread_group_size.x, ""); + shader_print_uint_literal(compiler, ", ", ins->declaration.thread_group_size.y, ""); + shader_print_uint_literal(compiler, ", ", ins->declaration.thread_group_size.z, ""); + break; + + case VKD3DSIH_DCL_UAV_RAW: + shader_dump_uav_flags(compiler, ins->flags); + shader_addline(buffer, " "); + shader_dump_dst_param(compiler, &ins->declaration.raw_resource.resource.reg, true); + shader_dump_register_space(compiler, ins->declaration.raw_resource.resource.range.space); + break; + + case VKD3DSIH_DCL_UAV_STRUCTURED: + shader_dump_uav_flags(compiler, ins->flags); + shader_addline(buffer, " "); + shader_dump_dst_param(compiler, &ins->declaration.structured_resource.resource.reg, true); + shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); + shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); + break; + + case VKD3DSIH_DEF: + vkd3d_string_buffer_printf(buffer, " %sc%u%s", compiler->colours.reg, + shader_get_float_offset(ins->dst[0].reg.type, ins->dst[0].reg.idx[0].offset), + compiler->colours.reset); + shader_print_float_literal(compiler, " = ", ins->src[0].reg.u.immconst_float[0], ""); + shader_print_float_literal(compiler, ", ", ins->src[0].reg.u.immconst_float[1], ""); + shader_print_float_literal(compiler, ", ", ins->src[0].reg.u.immconst_float[2], ""); + shader_print_float_literal(compiler, ", ", ins->src[0].reg.u.immconst_float[3], ""); + break; + + case VKD3DSIH_DEFI: + vkd3d_string_buffer_printf(buffer, " %si%u%s", compiler->colours.reg, + ins->dst[0].reg.idx[0].offset, compiler->colours.reset); + shader_print_int_literal(compiler, " = ", ins->src[0].reg.u.immconst_uint[0], ""); + shader_print_int_literal(compiler, ", ", ins->src[0].reg.u.immconst_uint[1], ""); + shader_print_int_literal(compiler, ", ", ins->src[0].reg.u.immconst_uint[2], ""); + shader_print_int_literal(compiler, ", ", ins->src[0].reg.u.immconst_uint[3], ""); + break; + + case VKD3DSIH_DEFB: + vkd3d_string_buffer_printf(buffer, " %sb%u%s", compiler->colours.reg, + ins->dst[0].reg.idx[0].offset, compiler->colours.reset); + shader_print_bool_literal(compiler, " = ", ins->src[0].reg.u.immconst_uint[0], ""); + break; + + default: + shader_dump_instruction_flags(compiler, ins); + + if (ins->resource_type != VKD3D_SHADER_RESOURCE_NONE) + { + shader_addline(buffer, "_indexable("); + if (ins->raw) + vkd3d_string_buffer_printf(buffer, "raw_"); + if (ins->structured) + vkd3d_string_buffer_printf(buffer, "structured_"); + shader_dump_resource_type(compiler, ins->resource_type); + if (ins->resource_stride) + shader_print_uint_literal(compiler, ", stride=", ins->resource_stride, ""); + shader_addline(buffer, ")"); + } + + if (vkd3d_shader_instruction_has_texel_offset(ins)) + { + shader_print_int_literal(compiler, "(", ins->texel_offset.u, ""); + shader_print_int_literal(compiler, ",", ins->texel_offset.v, ""); + shader_print_int_literal(compiler, ",", ins->texel_offset.w, ")"); + } + + if (ins->resource_data_type[0] != VKD3D_DATA_FLOAT + || ins->resource_data_type[1] != VKD3D_DATA_FLOAT + || ins->resource_data_type[2] != VKD3D_DATA_FLOAT + || ins->resource_data_type[3] != VKD3D_DATA_FLOAT) + shader_dump_data_type(compiler, ins->resource_data_type); + + for (i = 0; i < ins->dst_count; ++i) + { + shader_dump_ins_modifiers(compiler, &ins->dst[i]); + shader_addline(buffer, !i ? " " : ", "); + shader_dump_dst_param(compiler, &ins->dst[i], false); + } + + /* Other source tokens */ + for (i = ins->dst_count; i < (ins->dst_count + ins->src_count); ++i) + { + shader_addline(buffer, !i ? " " : ", "); + shader_dump_src_param(compiler, &ins->src[i - ins->dst_count]); + } + break; + } + + shader_addline(buffer, "\n"); +} + +enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out) +{ + enum vkd3d_shader_compile_option_formatting_flags formatting; + struct vkd3d_shader_version *shader_version; + struct vkd3d_d3d_asm_compiler compiler; + enum vkd3d_result result = VKD3D_OK; + struct vkd3d_string_buffer *buffer; + unsigned int indent, i; + const char *indent_str; + void *code; + + static const struct vkd3d_d3d_asm_colours no_colours = + { + .reset = "", + .error = "", + .literal = "", + .modifier = "", + .opcode = "", + .reg = "", + .swizzle = "", + .version = "", + .write_mask = "", + }; + static const struct vkd3d_d3d_asm_colours colours = + { + .reset = "\x1b[m", + .error = "\x1b[97;41m", + .literal = "\x1b[95m", + .modifier = "\x1b[36m", + .opcode = "\x1b[96;1m", + .reg = "\x1b[96m", + .swizzle = "\x1b[93m", + .version = "\x1b[36m", + .write_mask = "\x1b[93m", + }; + + formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT + | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER; + if (compile_info) + { + for (i = 0; i < compile_info->option_count; ++i) + { + const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; + + if (option->name == VKD3D_SHADER_COMPILE_OPTION_FORMATTING) + formatting = option->value; + } + } + + if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_COLOUR) + compiler.colours = colours; + else + compiler.colours = no_colours; + if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT) + indent_str = " "; + else + indent_str = ""; + + buffer = &compiler.buffer; + vkd3d_string_buffer_init(buffer); + + shader_version = &compiler.shader_version; + *shader_version = parser->shader_version; + vkd3d_string_buffer_printf(buffer, "%s%s_%u_%u%s\n", compiler.colours.version, + shader_get_type_prefix(shader_version->type), shader_version->major, + shader_version->minor, compiler.colours.reset); + + indent = 0; + vkd3d_shader_parser_reset(parser); + while (!vkd3d_shader_parser_is_end(parser)) + { + struct vkd3d_shader_instruction ins; + + vkd3d_shader_parser_read_instruction(parser, &ins); + if (ins.handler_idx == VKD3DSIH_INVALID) + { + WARN("Skipping unrecognized instruction.\n"); + vkd3d_string_buffer_printf(buffer, "\n"); + result = VKD3D_ERROR; + continue; + } + + switch (ins.handler_idx) + { + case VKD3DSIH_ELSE: + case VKD3DSIH_ENDIF: + case VKD3DSIH_ENDLOOP: + case VKD3DSIH_ENDSWITCH: + --indent; + break; + + default: + break; + } + + for (i = 0; i < indent; ++i) + { + vkd3d_string_buffer_printf(buffer, "%s", indent_str); + } + + shader_dump_instruction(&compiler, &ins); + + switch (ins.handler_idx) + { + case VKD3DSIH_ELSE: + case VKD3DSIH_IF: + case VKD3DSIH_LOOP: + case VKD3DSIH_SWITCH: + ++indent; + break; + + default: + break; + } + } + + if (parser->failed) + result = VKD3D_ERROR_INVALID_SHADER; + + if ((code = vkd3d_malloc(buffer->content_size))) + { + memcpy(code, buffer->buffer, buffer->content_size); + out->size = buffer->content_size; + out->code = code; + } + else + { + result = VKD3D_ERROR_OUT_OF_MEMORY; + } + + vkd3d_string_buffer_cleanup(buffer); + + return result; +} + +void vkd3d_shader_trace(struct vkd3d_shader_parser *parser) +{ + const char *p, *q, *end; + struct vkd3d_shader_code code; + + if (vkd3d_dxbc_binary_to_text(parser, NULL, &code) != VKD3D_OK) + return; + + end = (const char *)code.code + code.size; + for (p = code.code; p < end; p = q) + { + if (!(q = memchr(p, '\n', end - p))) + q = end; + else + ++q; + TRACE(" %.*s", (int)(q - p), p); + } + + vkd3d_shader_free_shader_code(&code); +} diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c new file mode 100644 index 00000000000..a5fc1e003ee --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -0,0 +1,1518 @@ +/* + * Copyright 2017 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" +#include "vkd3d_version.h" + +#include +#include + +void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer) +{ + buffer->buffer_size = 16; + buffer->content_size = 0; + buffer->buffer = vkd3d_malloc(buffer->buffer_size); + assert(buffer->buffer); + memset(buffer->buffer, 0, buffer->buffer_size); +} + +void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer) +{ + vkd3d_free(buffer->buffer); +} + +static void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer) +{ + buffer->buffer[0] = '\0'; + buffer->content_size = 0; +} + +static bool vkd3d_string_buffer_resize(struct vkd3d_string_buffer *buffer, int rc) +{ + unsigned int new_buffer_size = rc >= 0 ? buffer->content_size + rc + 1 : buffer->buffer_size * 2; + + if (!vkd3d_array_reserve((void **)&buffer->buffer, &buffer->buffer_size, new_buffer_size, 1)) + { + ERR("Failed to grow buffer.\n"); + buffer->buffer[buffer->content_size] = '\0'; + return false; + } + return true; +} + +int vkd3d_string_buffer_vprintf(struct vkd3d_string_buffer *buffer, const char *format, va_list args) +{ + unsigned int rem; + va_list a; + int rc; + + for (;;) + { + rem = buffer->buffer_size - buffer->content_size; + va_copy(a, args); + rc = vsnprintf(&buffer->buffer[buffer->content_size], rem, format, a); + va_end(a); + if (rc >= 0 && (unsigned int)rc < rem) + { + buffer->content_size += rc; + return 0; + } + + if (!vkd3d_string_buffer_resize(buffer, rc)) + return -1; + } +} + +int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *format, ...) +{ + va_list args; + int ret; + + va_start(args, format); + ret = vkd3d_string_buffer_vprintf(buffer, format, args); + va_end(args); + + return ret; +} + +int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f) +{ + unsigned int idx = buffer->content_size + 1; + int ret; + + if (!(ret = vkd3d_string_buffer_printf(buffer, "%.8e", f)) && isfinite(f)) + { + if (signbit(f)) + ++idx; + buffer->buffer[idx] = '.'; + } + + return ret; +} + +int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d) +{ + unsigned int idx = buffer->content_size + 1; + int ret; + + if (!(ret = vkd3d_string_buffer_printf(buffer, "%.16e", d)) && isfinite(d)) + { + if (signbit(d)) + ++idx; + buffer->buffer[idx] = '.'; + } + + return ret; +} + +void vkd3d_string_buffer_trace_(const struct vkd3d_string_buffer *buffer, const char *function) +{ + vkd3d_shader_trace_text_(buffer->buffer, buffer->content_size, function); +} + +void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function) +{ + const char *p, *q, *end = text + size; + + if (!TRACE_ON()) + return; + + for (p = text; p < end; p = q) + { + if (!(q = memchr(p, '\n', end - p))) + q = end; + else + ++q; + vkd3d_dbg_printf(VKD3D_DBG_LEVEL_TRACE, function, "%.*s", (int)(q - p), p); + } +} + +void vkd3d_string_buffer_cache_init(struct vkd3d_string_buffer_cache *cache) +{ + memset(cache, 0, sizeof(*cache)); +} + +void vkd3d_string_buffer_cache_cleanup(struct vkd3d_string_buffer_cache *cache) +{ + unsigned int i; + + for (i = 0; i < cache->count; ++i) + { + vkd3d_string_buffer_cleanup(cache->buffers[i]); + vkd3d_free(cache->buffers[i]); + } + vkd3d_free(cache->buffers); + vkd3d_string_buffer_cache_init(cache); +} + +struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *cache) +{ + struct vkd3d_string_buffer *buffer; + + if (!cache->count) + { + if (!vkd3d_array_reserve((void **)&cache->buffers, &cache->capacity, + cache->max_count + 1, sizeof(*cache->buffers))) + return NULL; + ++cache->max_count; + + if (!(buffer = vkd3d_malloc(sizeof(*buffer)))) + return NULL; + vkd3d_string_buffer_init(buffer); + } + else + { + buffer = cache->buffers[--cache->count]; + } + vkd3d_string_buffer_clear(buffer); + return buffer; +} + +void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *cache, struct vkd3d_string_buffer *buffer) +{ + if (!buffer) + return; + assert(cache->count + 1 <= cache->max_count); + cache->buffers[cache->count++] = buffer; +} + +void vkd3d_shader_message_context_init(struct vkd3d_shader_message_context *context, + enum vkd3d_shader_log_level log_level) +{ + context->log_level = log_level; + vkd3d_string_buffer_init(&context->messages); +} + +void vkd3d_shader_message_context_cleanup(struct vkd3d_shader_message_context *context) +{ + vkd3d_string_buffer_cleanup(&context->messages); +} + +void vkd3d_shader_message_context_trace_messages_(const struct vkd3d_shader_message_context *context, + const char *function) +{ + vkd3d_string_buffer_trace_(&context->messages, function); +} + +bool vkd3d_shader_message_context_copy_messages(struct vkd3d_shader_message_context *context, char **out) +{ + char *messages; + + if (!out) + return true; + + *out = NULL; + + if (!context->messages.content_size) + return true; + + if (!(messages = vkd3d_malloc(context->messages.content_size + 1))) + return false; + memcpy(messages, context->messages.buffer, context->messages.content_size + 1); + *out = messages; + return true; +} + +void vkd3d_shader_vnote(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_log_level level, const char *format, va_list args) +{ + if (context->log_level < level) + return; + + if (location) + { + const char *source_name = location->source_name ? location->source_name : ""; + + if (location->line) + vkd3d_string_buffer_printf(&context->messages, "%s:%u:%u: ", + source_name, location->line, location->column); + else + vkd3d_string_buffer_printf(&context->messages, "%s: ", source_name); + } + vkd3d_string_buffer_vprintf(&context->messages, format, args); + vkd3d_string_buffer_printf(&context->messages, "\n"); +} + +void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_error error, const char *format, va_list args) +{ + if (context->log_level < VKD3D_SHADER_LOG_WARNING) + return; + + if (location) + { + const char *source_name = location->source_name ? location->source_name : ""; + + if (location->line) + vkd3d_string_buffer_printf(&context->messages, "%s:%u:%u: W%04u: ", + source_name, location->line, location->column, error); + else + vkd3d_string_buffer_printf(&context->messages, "%s: W%04u: ", source_name, error); + } + else + { + vkd3d_string_buffer_printf(&context->messages, "W%04u: ", error); + } + vkd3d_string_buffer_vprintf(&context->messages, format, args); + vkd3d_string_buffer_printf(&context->messages, "\n"); +} + +void vkd3d_shader_verror(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_error error, const char *format, va_list args) +{ + if (context->log_level < VKD3D_SHADER_LOG_ERROR) + return; + + if (location) + { + const char *source_name = location->source_name ? location->source_name : ""; + + if (location->line) + vkd3d_string_buffer_printf(&context->messages, "%s:%u:%u: E%04u: ", + source_name, location->line, location->column, error); + else + vkd3d_string_buffer_printf(&context->messages, "%s: E%04u: ", source_name, error); + } + else + { + vkd3d_string_buffer_printf(&context->messages, "E%04u: ", error); + } + vkd3d_string_buffer_vprintf(&context->messages, format, args); + vkd3d_string_buffer_printf(&context->messages, "\n"); +} + +void vkd3d_shader_error(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_error error, const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_verror(context, location, error, format, args); + va_end(args); +} + +size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size) +{ + size_t aligned_size = align(size, 4); + size_t offset = buffer->size; + + if (buffer->status) + return offset; + + if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, offset + aligned_size, 1)) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return offset; + } + memcpy(buffer->data + offset, bytes, size); + memset(buffer->data + offset + size, 0xab, aligned_size - size); + buffer->size = offset + aligned_size; + return offset; +} + +void set_u32(struct vkd3d_bytecode_buffer *buffer, size_t offset, uint32_t value) +{ + if (buffer->status) + return; + + assert(vkd3d_bound_range(offset, sizeof(value), buffer->size)); + memcpy(buffer->data + offset, &value, sizeof(value)); +} + +static void vkd3d_shader_dump_blob(const char *path, const char *prefix, + const char *suffix, const void *data, size_t size) +{ + static LONG shader_id = 0; + char filename[1024]; + unsigned int id; + FILE *f; + + id = InterlockedIncrement(&shader_id) - 1; + + snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%s-%u.%s", path, prefix, id, suffix); + if ((f = fopen(filename, "wb"))) + { + if (fwrite(data, 1, size, f) != size) + ERR("Failed to write shader to %s.\n", filename); + if (fclose(f)) + ERR("Failed to close stream %s.\n", filename); + } + else + { + ERR("Failed to open %s for dumping shader.\n", filename); + } +} + +static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type type) +{ + switch (type) + { + case VKD3D_SHADER_SOURCE_DXBC_TPF: + return "dxbc"; + case VKD3D_SHADER_SOURCE_HLSL: + return "hlsl"; + case VKD3D_SHADER_SOURCE_D3D_BYTECODE: + return "d3dbc"; + default: + FIXME("Unhandled source type %#x.\n", type); + return "bin"; + } +} + +void vkd3d_shader_dump_shader(enum vkd3d_shader_source_type source_type, + enum vkd3d_shader_type shader_type, const struct vkd3d_shader_code *shader) +{ + static bool enabled = true; + const char *path; + + if (!enabled) + return; + + if (!(path = getenv("VKD3D_SHADER_DUMP_PATH"))) + { + enabled = false; + return; + } + + vkd3d_shader_dump_blob(path, shader_get_type_prefix(shader_type), + shader_get_source_type_suffix(source_type), shader->code, shader->size); +} + +void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, + struct vkd3d_shader_message_context *message_context, const char *source_name, + const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops) +{ + parser->message_context = message_context; + parser->location.source_name = source_name; + parser->location.line = 1; + parser->location.column = 0; + parser->shader_version = *version; + parser->ops = ops; +} + +void VKD3D_PRINTF_FUNC(3, 4) vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_error error, const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_verror(parser->message_context, &parser->location, error, format, args); + va_end(args); + + parser->failed = true; +} + +void VKD3D_PRINTF_FUNC(3, 4) vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_error error, const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_vwarning(parser->message_context, &parser->location, error, format, args); + va_end(args); +} + +static int vkd3d_shader_validate_compile_info(const struct vkd3d_shader_compile_info *compile_info, + bool validate_target_type) +{ + const enum vkd3d_shader_source_type *source_types; + const enum vkd3d_shader_target_type *target_types; + unsigned int count, i; + + if (compile_info->type != VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO) + { + WARN("Invalid structure type %#x.\n", compile_info->type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + source_types = vkd3d_shader_get_supported_source_types(&count); + for (i = 0; i < count; ++i) + { + if (source_types[i] == compile_info->source_type) + break; + } + if (i == count) + { + WARN("Invalid shader source type %#x.\n", compile_info->source_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + if (validate_target_type) + { + target_types = vkd3d_shader_get_supported_target_types(compile_info->source_type, &count); + for (i = 0; i < count; ++i) + { + if (target_types[i] == compile_info->target_type) + break; + } + if (i == count) + { + WARN("Invalid shader target type %#x.\n", compile_info->target_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + } + + return VKD3D_OK; +} + +void vkd3d_shader_free_messages(char *messages) +{ + TRACE("messages %p.\n", messages); + + vkd3d_free(messages); +} + +struct vkd3d_shader_scan_context +{ + struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; + size_t descriptors_size; + + struct vkd3d_shader_message_context *message_context; + struct vkd3d_shader_location location; + + struct vkd3d_shader_cf_info + { + enum + { + VKD3D_SHADER_BLOCK_IF, + VKD3D_SHADER_BLOCK_LOOP, + VKD3D_SHADER_BLOCK_SWITCH, + } type; + bool inside_block; + bool has_default; + } *cf_info; + size_t cf_info_size; + size_t cf_info_count; + + struct + { + unsigned int id; + unsigned int descriptor_idx; + } *uav_ranges; + size_t uav_ranges_size; + size_t uav_range_count; + + enum vkd3d_shader_api_version api_version; +}; + +static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context) +{ + unsigned int i; + + memset(context, 0, sizeof(*context)); + context->scan_descriptor_info = scan_descriptor_info; + context->message_context = message_context; + context->location.source_name = compile_info->source_name; + context->location.line = 2; /* Line 1 is the version token. */ + context->api_version = VKD3D_SHADER_API_VERSION_1_2; + + for (i = 0; i < compile_info->option_count; ++i) + { + const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; + + if (option->name == VKD3D_SHADER_COMPILE_OPTION_API_VERSION) + context->api_version = option->value; + } +} + +static void vkd3d_shader_scan_context_cleanup(struct vkd3d_shader_scan_context *context) +{ + vkd3d_free(context->uav_ranges); + vkd3d_free(context->cf_info); +} + +static struct vkd3d_shader_cf_info *vkd3d_shader_scan_get_current_cf_info(struct vkd3d_shader_scan_context *context) +{ + if (!context->cf_info_count) + return NULL; + return &context->cf_info[context->cf_info_count - 1]; +} + +static struct vkd3d_shader_cf_info *vkd3d_shader_scan_push_cf_info(struct vkd3d_shader_scan_context *context) +{ + struct vkd3d_shader_cf_info *cf_info; + + if (!vkd3d_array_reserve((void **)&context->cf_info, &context->cf_info_size, + context->cf_info_count + 1, sizeof(*context->cf_info))) + { + ERR("Failed to allocate UAV range.\n"); + return false; + } + + cf_info = &context->cf_info[context->cf_info_count++]; + memset(cf_info, 0, sizeof(*cf_info)); + + return cf_info; +} + +static void vkd3d_shader_scan_pop_cf_info(struct vkd3d_shader_scan_context *context) +{ + assert(context->cf_info_count); + + --context->cf_info_count; +} + +static struct vkd3d_shader_cf_info *vkd3d_shader_scan_find_innermost_breakable_cf_info( + struct vkd3d_shader_scan_context *context) +{ + size_t count = context->cf_info_count; + struct vkd3d_shader_cf_info *cf_info; + + while (count) + { + cf_info = &context->cf_info[--count]; + if (cf_info->type == VKD3D_SHADER_BLOCK_LOOP + || cf_info->type == VKD3D_SHADER_BLOCK_SWITCH) + return cf_info; + } + + return NULL; +} + +static struct vkd3d_shader_cf_info *vkd3d_shader_scan_find_innermost_loop_cf_info( + struct vkd3d_shader_scan_context *context) +{ + size_t count = context->cf_info_count; + struct vkd3d_shader_cf_info *cf_info; + + while (count) + { + cf_info = &context->cf_info[--count]; + if (cf_info->type == VKD3D_SHADER_BLOCK_LOOP) + return cf_info; + } + + return NULL; +} + +static struct vkd3d_shader_descriptor_info *vkd3d_shader_scan_get_uav_descriptor_info( + const struct vkd3d_shader_scan_context *context, unsigned int range_id) +{ + unsigned int i; + + for (i = 0; i < context->uav_range_count; ++i) + { + if (context->uav_ranges[i].id == range_id) + return &context->scan_descriptor_info->descriptors[context->uav_ranges[i].descriptor_idx]; + } + + return NULL; +} + +static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) +{ + enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; + return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) + || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR) + || handler_idx == VKD3DSIH_LD_UAV_TYPED + || (handler_idx == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) + || (handler_idx == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); +} + +static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg) +{ + struct vkd3d_shader_descriptor_info *d; + + if (!context->scan_descriptor_info) + return; + + d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); + d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ; +} + +static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) +{ + enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; + return handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC + || handler_idx == VKD3DSIH_IMM_ATOMIC_CONSUME; +} + +static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg) +{ + struct vkd3d_shader_descriptor_info *d; + + if (!context->scan_descriptor_info) + return; + + d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); + d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER; +} + +static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, + enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register_range *range, + enum vkd3d_shader_resource_type resource_type, enum vkd3d_shader_resource_data_type resource_data_type, + unsigned int flags) +{ + struct vkd3d_shader_scan_descriptor_info *info = context->scan_descriptor_info; + struct vkd3d_shader_descriptor_info *d; + + if (!vkd3d_array_reserve((void **)&info->descriptors, &context->descriptors_size, + info->descriptor_count + 1, sizeof(*info->descriptors))) + { + ERR("Failed to allocate descriptor info.\n"); + return false; + } + + d = &info->descriptors[info->descriptor_count]; + d->type = type; + d->register_space = range->space; + d->register_index = range->first; + d->resource_type = resource_type; + d->resource_data_type = resource_data_type; + d->flags = flags; + d->count = (range->last == ~0u) ? ~0u : range->last - range->first + 1; + ++info->descriptor_count; + + return true; +} + +static bool vkd3d_shader_scan_add_uav_range(struct vkd3d_shader_scan_context *context, + unsigned int id, unsigned int descriptor_idx) +{ + if (!vkd3d_array_reserve((void **)&context->uav_ranges, &context->uav_ranges_size, + context->uav_range_count + 1, sizeof(*context->uav_ranges))) + { + ERR("Failed to allocate UAV range.\n"); + return false; + } + + context->uav_ranges[context->uav_range_count].id = id; + context->uav_ranges[context->uav_range_count].descriptor_idx = descriptor_idx; + ++context->uav_range_count; + + return true; +} + +static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; + + if (!context->scan_descriptor_info) + return; + + vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->range, + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); +} + +static void vkd3d_shader_scan_sampler_declaration(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; + unsigned int flags; + + if (!context->scan_descriptor_info) + return; + + if (instruction->flags & VKD3DSI_SAMPLER_COMPARISON_MODE) + flags = VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; + else + flags = 0; + vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->range, + VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT, flags); +} + +static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, + enum vkd3d_shader_resource_data_type resource_data_type) +{ + enum vkd3d_shader_descriptor_type type; + + if (!context->scan_descriptor_info) + return; + + if (resource->reg.reg.type == VKD3DSPR_UAV) + type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; + else + type = VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; + vkd3d_shader_scan_add_descriptor(context, type, &resource->range, resource_type, resource_data_type, 0); + if (type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + vkd3d_shader_scan_add_uav_range(context, resource->reg.reg.idx[0].offset, + context->scan_descriptor_info->descriptor_count - 1); +} + +static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; + enum vkd3d_shader_resource_data_type resource_data_type; + + if (semantic->resource_data_type[0] != semantic->resource_data_type[1] || + semantic->resource_data_type[0] != semantic->resource_data_type[2] || + semantic->resource_data_type[0] != semantic->resource_data_type[3]) + FIXME("Resource data types are different (%d, %d, %d, %d).\n", + semantic->resource_data_type[0], + semantic->resource_data_type[1], + semantic->resource_data_type[2], + semantic->resource_data_type[3]); + + switch (semantic->resource_data_type[0]) + { + case VKD3D_DATA_UNORM: + resource_data_type = VKD3D_SHADER_RESOURCE_DATA_UNORM; + break; + case VKD3D_DATA_SNORM: + resource_data_type = VKD3D_SHADER_RESOURCE_DATA_SNORM; + break; + case VKD3D_DATA_INT: + resource_data_type = VKD3D_SHADER_RESOURCE_DATA_INT; + break; + case VKD3D_DATA_UINT: + resource_data_type = VKD3D_SHADER_RESOURCE_DATA_UINT; + break; + case VKD3D_DATA_FLOAT: + resource_data_type = VKD3D_SHADER_RESOURCE_DATA_FLOAT; + break; + case VKD3D_DATA_MIXED: + resource_data_type = VKD3D_SHADER_RESOURCE_DATA_MIXED; + break; + case VKD3D_DATA_DOUBLE: + resource_data_type = VKD3D_SHADER_RESOURCE_DATA_DOUBLE; + break; + case VKD3D_DATA_CONTINUED: + resource_data_type = VKD3D_SHADER_RESOURCE_DATA_CONTINUED; + break; + default: + ERR("Invalid resource data type %#x.\n", semantic->resource_data_type[0]); + resource_data_type = VKD3D_SHADER_RESOURCE_DATA_FLOAT; + break; + } + + if (context->api_version < VKD3D_SHADER_API_VERSION_1_3 + && resource_data_type >= VKD3D_SHADER_RESOURCE_DATA_MIXED) + { + ERR("Invalid resource data type %#x for API version %#x.\n", + semantic->resource_data_type[0], context->api_version); + resource_data_type = VKD3D_SHADER_RESOURCE_DATA_FLOAT; + } + + vkd3d_shader_scan_resource_declaration(context, &semantic->resource, + semantic->resource_type, resource_data_type); +} + +static void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, + enum vkd3d_shader_error error, const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_verror(context->message_context, &context->location, error, format, args); + va_end(args); +} + +static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_shader_cf_info *cf_info; + unsigned int i; + + switch (instruction->handler_idx) + { + case VKD3DSIH_DCL_CONSTANT_BUFFER: + vkd3d_shader_scan_constant_buffer_declaration(context, instruction); + break; + case VKD3DSIH_DCL_SAMPLER: + vkd3d_shader_scan_sampler_declaration(context, instruction); + break; + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_UAV_TYPED: + vkd3d_shader_scan_typed_resource_declaration(context, instruction); + break; + case VKD3DSIH_DCL_RESOURCE_RAW: + case VKD3DSIH_DCL_UAV_RAW: + vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.raw_resource.resource, + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); + break; + case VKD3DSIH_DCL_RESOURCE_STRUCTURED: + case VKD3DSIH_DCL_UAV_STRUCTURED: + vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.structured_resource.resource, + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); + break; + case VKD3DSIH_IF: + cf_info = vkd3d_shader_scan_push_cf_info(context); + cf_info->type = VKD3D_SHADER_BLOCK_IF; + cf_info->inside_block = true; + break; + case VKD3DSIH_ELSE: + if (!(cf_info = vkd3d_shader_scan_get_current_cf_info(context)) || cf_info->type != VKD3D_SHADER_BLOCK_IF) + { + vkd3d_shader_scan_error(context, VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF, + "Encountered ‘else’ instruction without corresponding ‘if’ block."); + return VKD3D_ERROR_INVALID_SHADER; + } + cf_info->inside_block = true; + break; + case VKD3DSIH_ENDIF: + if (!(cf_info = vkd3d_shader_scan_get_current_cf_info(context)) || cf_info->type != VKD3D_SHADER_BLOCK_IF) + { + vkd3d_shader_scan_error(context, VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF, + "Encountered ‘endif’ instruction without corresponding ‘if’ block."); + return VKD3D_ERROR_INVALID_SHADER; + } + vkd3d_shader_scan_pop_cf_info(context); + break; + case VKD3DSIH_LOOP: + cf_info = vkd3d_shader_scan_push_cf_info(context); + cf_info->type = VKD3D_SHADER_BLOCK_LOOP; + cf_info->inside_block = true; + break; + case VKD3DSIH_ENDLOOP: + if (!(cf_info = vkd3d_shader_scan_get_current_cf_info(context)) || cf_info->type != VKD3D_SHADER_BLOCK_LOOP) + { + vkd3d_shader_scan_error(context, VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF, + "Encountered ‘endloop’ instruction without corresponding ‘loop’ block."); + return VKD3D_ERROR_INVALID_SHADER; + } + vkd3d_shader_scan_pop_cf_info(context); + break; + case VKD3DSIH_SWITCH: + cf_info = vkd3d_shader_scan_push_cf_info(context); + cf_info->type = VKD3D_SHADER_BLOCK_SWITCH; + break; + case VKD3DSIH_ENDSWITCH: + if (!(cf_info = vkd3d_shader_scan_get_current_cf_info(context)) + || cf_info->type != VKD3D_SHADER_BLOCK_SWITCH || cf_info->inside_block) + { + vkd3d_shader_scan_error(context, VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF, + "Encountered ‘endswitch’ instruction without corresponding ‘switch’ block."); + return VKD3D_ERROR_INVALID_SHADER; + } + vkd3d_shader_scan_pop_cf_info(context); + break; + case VKD3DSIH_CASE: + if (!(cf_info = vkd3d_shader_scan_get_current_cf_info(context)) + || cf_info->type != VKD3D_SHADER_BLOCK_SWITCH) + { + vkd3d_shader_scan_error(context, VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF, + "Encountered ‘case’ instruction outside switch block."); + return VKD3D_ERROR_INVALID_SHADER; + } + cf_info->inside_block = true; + break; + case VKD3DSIH_DEFAULT: + if (!(cf_info = vkd3d_shader_scan_get_current_cf_info(context)) + || cf_info->type != VKD3D_SHADER_BLOCK_SWITCH) + { + vkd3d_shader_scan_error(context, VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF, + "Encountered ‘default’ instruction outside switch block."); + return VKD3D_ERROR_INVALID_SHADER; + } + if (cf_info->has_default) + { + vkd3d_shader_scan_error(context, VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF, + "Encountered duplicate ‘default’ instruction inside the current switch block."); + return VKD3D_ERROR_INVALID_SHADER; + } + cf_info->inside_block = true; + cf_info->has_default = true; + break; + case VKD3DSIH_BREAK: + if (!(cf_info = vkd3d_shader_scan_find_innermost_breakable_cf_info(context))) + { + vkd3d_shader_scan_error(context, VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF, + "Encountered ‘break’ instruction outside breakable block."); + return VKD3D_ERROR_INVALID_SHADER; + } + cf_info->inside_block = false; + break; + case VKD3DSIH_BREAKP: + if (!(cf_info = vkd3d_shader_scan_find_innermost_loop_cf_info(context))) + { + vkd3d_shader_scan_error(context, VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF, + "Encountered ‘breakp’ instruction outside loop."); + return VKD3D_ERROR_INVALID_SHADER; + } + break; + case VKD3DSIH_CONTINUE: + if (!(cf_info = vkd3d_shader_scan_find_innermost_loop_cf_info(context))) + { + vkd3d_shader_scan_error(context, VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF, + "Encountered ‘continue’ instruction outside loop."); + return VKD3D_ERROR_INVALID_SHADER; + } + cf_info->inside_block = false; + break; + case VKD3DSIH_CONTINUEP: + if (!(cf_info = vkd3d_shader_scan_find_innermost_loop_cf_info(context))) + { + vkd3d_shader_scan_error(context, VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF, + "Encountered ‘continue’ instruction outside loop."); + return VKD3D_ERROR_INVALID_SHADER; + } + break; + case VKD3DSIH_RET: + if (context->cf_info_count) + context->cf_info[context->cf_info_count - 1].inside_block = false; + break; + default: + break; + } + + if (vkd3d_shader_instruction_is_uav_read(instruction)) + { + for (i = 0; i < instruction->dst_count; ++i) + { + if (instruction->dst[i].reg.type == VKD3DSPR_UAV) + vkd3d_shader_scan_record_uav_read(context, &instruction->dst[i].reg); + } + for (i = 0; i < instruction->src_count; ++i) + { + if (instruction->src[i].reg.type == VKD3DSPR_UAV) + vkd3d_shader_scan_record_uav_read(context, &instruction->src[i].reg); + } + } + + if (vkd3d_shader_instruction_is_uav_counter(instruction)) + vkd3d_shader_scan_record_uav_counter(context, &instruction->src[0].reg); + + ++context->location.line; + return VKD3D_OK; +} + +static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; + struct vkd3d_shader_instruction instruction; + struct vkd3d_shader_scan_context context; + int ret; + + if ((scan_descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO))) + { + scan_descriptor_info->descriptors = NULL; + scan_descriptor_info->descriptor_count = 0; + } + + vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); + + if (TRACE_ON()) + { + vkd3d_shader_trace(parser); + vkd3d_shader_parser_reset(parser); + } + + while (!vkd3d_shader_parser_is_end(parser)) + { + vkd3d_shader_parser_read_instruction(parser, &instruction); + + if (instruction.handler_idx == VKD3DSIH_INVALID) + { + WARN("Encountered unrecognized or invalid instruction.\n"); + if (scan_descriptor_info) + vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); + ret = VKD3D_ERROR_INVALID_SHADER; + goto done; + } + + if ((ret = vkd3d_shader_scan_instruction(&context, &instruction)) < 0) + { + if (scan_descriptor_info) + vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); + goto done; + } + } + + ret = parser->failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; + +done: + vkd3d_shader_scan_context_cleanup(&context); + return ret; +} + +static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_parser *parser; + int ret; + + if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) + { + WARN("Failed to initialise shader parser.\n"); + return ret; + } + + ret = scan_with_parser(compile_info, message_context, parser); + vkd3d_shader_parser_destroy(parser); + + return ret; +} + +static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_parser *parser; + int ret; + + if ((ret = vkd3d_shader_sm1_parser_create(compile_info, message_context, &parser)) < 0) + { + WARN("Failed to initialise shader parser.\n"); + return ret; + } + + ret = scan_with_parser(compile_info, message_context, parser); + vkd3d_shader_parser_destroy(parser); + + return ret; +} + +int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) +{ + struct vkd3d_shader_message_context message_context; + int ret; + + TRACE("compile_info %p, messages %p.\n", compile_info, messages); + + if (messages) + *messages = NULL; + + if ((ret = vkd3d_shader_validate_compile_info(compile_info, false)) < 0) + return ret; + + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); + + switch (compile_info->source_type) + { + case VKD3D_SHADER_SOURCE_DXBC_TPF: + ret = scan_dxbc(compile_info, &message_context); + break; + + case VKD3D_SHADER_SOURCE_HLSL: + FIXME("HLSL support not implemented.\n"); + ret = VKD3D_ERROR_NOT_IMPLEMENTED; + break; + + case VKD3D_SHADER_SOURCE_D3D_BYTECODE: + ret = scan_d3dbc(compile_info, &message_context); + break; + + default: + ERR("Unsupported source type %#x.\n", compile_info->source_type); + ret = VKD3D_ERROR_INVALID_ARGUMENT; + break; + } + + vkd3d_shader_message_context_trace_messages(&message_context); + if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) + ret = VKD3D_ERROR_OUT_OF_MEMORY; + vkd3d_shader_message_context_cleanup(&message_context); + return ret; +} + +static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; + struct vkd3d_shader_instruction instruction; + struct vkd3d_shader_compile_info scan_info; + struct vkd3d_dxbc_compiler *spirv_compiler; + struct vkd3d_shader_parser *parser; + int ret; + + scan_info = *compile_info; + scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; + scan_descriptor_info.next = scan_info.next; + scan_info.next = &scan_descriptor_info; + + if ((ret = scan_dxbc(&scan_info, message_context)) < 0) + return ret; + + if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) + { + WARN("Failed to initialise shader parser.\n"); + vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return ret; + } + + vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); + + if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) + { + vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); + vkd3d_shader_parser_destroy(parser); + return ret; + } + + if (compile_info->target_type == VKD3D_SHADER_TARGET_GLSL) + { + struct vkd3d_glsl_generator *glsl_generator; + + if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, + message_context, &parser->location))) + { + ERR("Failed to create GLSL generator.\n"); + vkd3d_shader_parser_destroy(parser); + vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return VKD3D_ERROR; + } + + ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); + + vkd3d_glsl_generator_destroy(glsl_generator); + vkd3d_shader_parser_destroy(parser); + vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return ret; + } + + if (!(spirv_compiler = vkd3d_dxbc_compiler_create(&parser->shader_version, &parser->shader_desc, + compile_info, &scan_descriptor_info, message_context, &parser->location))) + { + ERR("Failed to create DXBC compiler.\n"); + vkd3d_shader_parser_destroy(parser); + vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return VKD3D_ERROR; + } + + while (!vkd3d_shader_parser_is_end(parser)) + { + vkd3d_shader_parser_read_instruction(parser, &instruction); + + if (instruction.handler_idx == VKD3DSIH_INVALID) + { + WARN("Encountered unrecognized or invalid instruction.\n"); + ret = VKD3D_ERROR_INVALID_SHADER; + break; + } + + if ((ret = vkd3d_dxbc_compiler_handle_instruction(spirv_compiler, &instruction)) < 0) + break; + } + + if (parser->failed) + ret = VKD3D_ERROR_INVALID_SHADER; + + if (ret >= 0) + ret = vkd3d_dxbc_compiler_generate_spirv(spirv_compiler, compile_info, out); + + vkd3d_dxbc_compiler_destroy(spirv_compiler); + vkd3d_shader_parser_destroy(parser); + vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return ret; +} + +static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_code preprocessed; + int ret; + + if ((ret = preproc_lexer_parse(compile_info, &preprocessed, message_context))) + return ret; + + ret = hlsl_compile_shader(&preprocessed, compile_info, out, message_context); + + vkd3d_shader_free_shader_code(&preprocessed); + return ret; +} + +static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_parser *parser; + int ret; + + if ((ret = vkd3d_shader_sm1_parser_create(compile_info, message_context, &parser)) < 0) + { + WARN("Failed to initialise shader parser.\n"); + return ret; + } + + vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); + + if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) + { + ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); + vkd3d_shader_parser_destroy(parser); + return ret; + } + + return VKD3D_ERROR; +} + +int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, char **messages) +{ + struct vkd3d_shader_message_context message_context; + int ret; + + TRACE("compile_info %p, out %p, messages %p.\n", compile_info, out, messages); + + if (messages) + *messages = NULL; + + if ((ret = vkd3d_shader_validate_compile_info(compile_info, true)) < 0) + return ret; + + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); + + switch (compile_info->source_type) + { + case VKD3D_SHADER_SOURCE_DXBC_TPF: + ret = compile_dxbc_tpf(compile_info, out, &message_context); + break; + + case VKD3D_SHADER_SOURCE_HLSL: + ret = compile_hlsl(compile_info, out, &message_context); + break; + + case VKD3D_SHADER_SOURCE_D3D_BYTECODE: + ret = compile_d3d_bytecode(compile_info, out, &message_context); + break; + + default: + assert(0); + } + + vkd3d_shader_message_context_trace_messages(&message_context); + if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) + ret = VKD3D_ERROR_OUT_OF_MEMORY; + vkd3d_shader_message_context_cleanup(&message_context); + return ret; +} + +void vkd3d_shader_free_scan_descriptor_info(struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info) +{ + TRACE("scan_descriptor_info %p.\n", scan_descriptor_info); + + vkd3d_free(scan_descriptor_info->descriptors); +} + +void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *shader_code) +{ + TRACE("shader_code %p.\n", shader_code); + + vkd3d_free((void *)shader_code->code); +} + +static void vkd3d_shader_free_root_signature_v_1_0(struct vkd3d_shader_root_signature_desc *root_signature) +{ + unsigned int i; + + for (i = 0; i < root_signature->parameter_count; ++i) + { + const struct vkd3d_shader_root_parameter *parameter = &root_signature->parameters[i]; + + if (parameter->parameter_type == VKD3D_SHADER_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + vkd3d_free((void *)parameter->u.descriptor_table.descriptor_ranges); + } + vkd3d_free((void *)root_signature->parameters); + vkd3d_free((void *)root_signature->static_samplers); + + memset(root_signature, 0, sizeof(*root_signature)); +} + +static void vkd3d_shader_free_root_signature_v_1_1(struct vkd3d_shader_root_signature_desc1 *root_signature) +{ + unsigned int i; + + for (i = 0; i < root_signature->parameter_count; ++i) + { + const struct vkd3d_shader_root_parameter1 *parameter = &root_signature->parameters[i]; + + if (parameter->parameter_type == VKD3D_SHADER_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + vkd3d_free((void *)parameter->u.descriptor_table.descriptor_ranges); + } + vkd3d_free((void *)root_signature->parameters); + vkd3d_free((void *)root_signature->static_samplers); + + memset(root_signature, 0, sizeof(*root_signature)); +} + +void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signature_desc *desc) +{ + TRACE("desc %p.\n", desc); + + if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + { + vkd3d_shader_free_root_signature_v_1_0(&desc->u.v_1_0); + } + else if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1) + { + vkd3d_shader_free_root_signature_v_1_1(&desc->u.v_1_1); + } + else if (desc->version) + { + FIXME("Unknown version %#x.\n", desc->version); + return; + } + + desc->version = 0; +} + +int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_signature *signature, char **messages) +{ + struct vkd3d_shader_message_context message_context; + int ret; + + TRACE("dxbc {%p, %zu}, signature %p, messages %p.\n", dxbc->code, dxbc->size, signature, messages); + + if (messages) + *messages = NULL; + vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); + + ret = shader_parse_input_signature(dxbc->code, dxbc->size, &message_context, signature); + vkd3d_shader_message_context_trace_messages(&message_context); + if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) + ret = VKD3D_ERROR_OUT_OF_MEMORY; + + vkd3d_shader_message_context_cleanup(&message_context); + + return ret; +} + +struct vkd3d_shader_signature_element *vkd3d_shader_find_signature_element( + const struct vkd3d_shader_signature *signature, const char *semantic_name, + unsigned int semantic_index, unsigned int stream_index) +{ + struct vkd3d_shader_signature_element *e; + unsigned int i; + + TRACE("signature %p, semantic_name %s, semantic_index %u, stream_index %u.\n", + signature, debugstr_a(semantic_name), semantic_index, stream_index); + + e = signature->elements; + for (i = 0; i < signature->element_count; ++i) + { + if (!ascii_strcasecmp(e[i].semantic_name, semantic_name) + && e[i].semantic_index == semantic_index + && e[i].stream_index == stream_index) + return &e[i]; + } + + return NULL; +} + +void vkd3d_shader_free_shader_signature(struct vkd3d_shader_signature *signature) +{ + TRACE("signature %p.\n", signature); + + vkd3d_free(signature->elements); + signature->elements = NULL; +} + +const char *vkd3d_shader_get_version(unsigned int *major, unsigned int *minor) +{ + int x, y; + + TRACE("major %p, minor %p.\n", major, minor); + + if (major || minor) + { + vkd3d_parse_version(PACKAGE_VERSION, &x, &y); + if (major) + *major = x; + if (minor) + *minor = y; + } + + return "vkd3d-shader " PACKAGE_VERSION VKD3D_VCS_ID; +} + +const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(unsigned int *count) +{ + static const enum vkd3d_shader_source_type types[] = + { + VKD3D_SHADER_SOURCE_DXBC_TPF, + VKD3D_SHADER_SOURCE_HLSL, + VKD3D_SHADER_SOURCE_D3D_BYTECODE, + }; + + TRACE("count %p.\n", count); + + *count = ARRAY_SIZE(types); + return types; +} + +const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + enum vkd3d_shader_source_type source_type, unsigned int *count) +{ + static const enum vkd3d_shader_target_type dxbc_tpf_types[] = + { + VKD3D_SHADER_TARGET_SPIRV_BINARY, +#ifdef HAVE_SPIRV_TOOLS + VKD3D_SHADER_TARGET_SPIRV_TEXT, +#endif + VKD3D_SHADER_TARGET_D3D_ASM, +#if 0 + VKD3D_SHADER_TARGET_GLSL, +#endif + }; + + static const enum vkd3d_shader_target_type hlsl_types[] = + { + VKD3D_SHADER_TARGET_D3D_BYTECODE, + VKD3D_SHADER_TARGET_DXBC_TPF, + }; + + static const enum vkd3d_shader_target_type d3dbc_types[] = + { + VKD3D_SHADER_TARGET_D3D_ASM, + }; + + TRACE("source_type %#x, count %p.\n", source_type, count); + + switch (source_type) + { + case VKD3D_SHADER_SOURCE_DXBC_TPF: + *count = ARRAY_SIZE(dxbc_tpf_types); + return dxbc_tpf_types; + + case VKD3D_SHADER_SOURCE_HLSL: + *count = ARRAY_SIZE(hlsl_types); + return hlsl_types; + + case VKD3D_SHADER_SOURCE_D3D_BYTECODE: + *count = ARRAY_SIZE(d3dbc_types); + return d3dbc_types; + + default: + *count = 0; + return NULL; + } +} + +int vkd3d_shader_preprocess(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, char **messages) +{ + struct vkd3d_shader_message_context message_context; + int ret; + + TRACE("compile_info %p, out %p, messages %p.\n", compile_info, out, messages); + + if (messages) + *messages = NULL; + + if ((ret = vkd3d_shader_validate_compile_info(compile_info, false)) < 0) + return ret; + + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); + + ret = preproc_lexer_parse(compile_info, out, &message_context); + + vkd3d_shader_message_context_trace_messages(&message_context); + if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) + ret = VKD3D_ERROR_OUT_OF_MEMORY; + vkd3d_shader_message_context_cleanup(&message_context); + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h new file mode 100644 index 00000000000..0f584d3fbcf --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -0,0 +1,1273 @@ +/* + * Copyright 2017 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + * + * This file incorporates work covered by the following copyright and + * permission notice: + * + * Copyright 2002-2003 The wine-d3d team + * Copyright 2002-2003 2004 Jason Edmeades + * Copyright 2002-2003 Raphael Junqueira + * Copyright 2005 Oliver Stieber + * Copyright 2006 Stefan Dösinger + * Copyright 2006-2011, 2013 Stefan Dösinger for CodeWeavers + * Copyright 2007 Henri Verbeet + * Copyright 2008-2009 Henri Verbeet for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_SHADER_PRIVATE_H +#define __VKD3D_SHADER_PRIVATE_H + +#define NONAMELESSUNION +#include "vkd3d_common.h" +#include "vkd3d_memory.h" +#include "vkd3d_shader.h" +#include "wine/list.h" + +#include +#include +#include +#include +#include + +#define VKD3D_VEC4_SIZE 4 +#define VKD3D_DVEC2_SIZE 2 + +enum vkd3d_shader_error +{ + VKD3D_SHADER_ERROR_DXBC_INVALID_SIZE = 1, + VKD3D_SHADER_ERROR_DXBC_INVALID_MAGIC = 2, + VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM = 3, + VKD3D_SHADER_ERROR_DXBC_INVALID_VERSION = 4, + VKD3D_SHADER_ERROR_DXBC_INVALID_CHUNK_OFFSET = 5, + VKD3D_SHADER_ERROR_DXBC_INVALID_CHUNK_SIZE = 6, + + VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF = 1000, + VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE = 1001, + + VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND = 2000, + VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE = 2001, + VKD3D_SHADER_ERROR_SPV_INVALID_DESCRIPTOR_BINDING = 2002, + VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_IDX_UNSUPPORTED = 2003, + VKD3D_SHADER_ERROR_SPV_STENCIL_EXPORT_UNSUPPORTED = 2004, + + VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY = 3000, + VKD3D_SHADER_ERROR_RS_INVALID_VERSION = 3001, + VKD3D_SHADER_ERROR_RS_INVALID_ROOT_PARAMETER_TYPE = 3002, + VKD3D_SHADER_ERROR_RS_INVALID_DESCRIPTOR_RANGE_TYPE = 3003, + VKD3D_SHADER_ERROR_RS_MIXED_DESCRIPTOR_RANGE_TYPES = 3004, + + VKD3D_SHADER_ERROR_PP_INVALID_SYNTAX = 4000, + VKD3D_SHADER_ERROR_PP_ERROR_DIRECTIVE = 4001, + VKD3D_SHADER_ERROR_PP_INCLUDE_FAILED = 4002, + + VKD3D_SHADER_WARNING_PP_ALREADY_DEFINED = 4300, + VKD3D_SHADER_WARNING_PP_INVALID_DIRECTIVE = 4301, + VKD3D_SHADER_WARNING_PP_ARGUMENT_COUNT_MISMATCH = 4302, + VKD3D_SHADER_WARNING_PP_UNKNOWN_DIRECTIVE = 4303, + VKD3D_SHADER_WARNING_PP_UNTERMINATED_MACRO = 4304, + VKD3D_SHADER_WARNING_PP_UNTERMINATED_IF = 4305, + VKD3D_SHADER_WARNING_PP_DIV_BY_ZERO = 4306, + + VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX = 5000, + VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER = 5001, + VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE = 5002, + VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST = 5003, + VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC = 5004, + VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED = 5005, + VKD3D_SHADER_ERROR_HLSL_REDEFINED = 5006, + VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT = 5007, + VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE = 5008, + VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER = 5009, + VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE = 5010, + VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK = 5011, + VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX = 5012, + VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC = 5013, + VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN = 5014, + VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS = 5015, + VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION = 5016, + VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED = 5017, + VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET = 5018, + VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS = 5019, + VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE = 5020, + + VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, + + VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, + + VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF = 7000, + VKD3D_SHADER_ERROR_D3DBC_INVALID_VERSION_TOKEN = 7001, + VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE = 7002, + VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE = 7003, + + VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, +}; + +enum vkd3d_shader_opcode +{ + VKD3DSIH_ABS, + VKD3DSIH_ADD, + VKD3DSIH_AND, + VKD3DSIH_ATOMIC_AND, + VKD3DSIH_ATOMIC_CMP_STORE, + VKD3DSIH_ATOMIC_IADD, + VKD3DSIH_ATOMIC_IMAX, + VKD3DSIH_ATOMIC_IMIN, + VKD3DSIH_ATOMIC_OR, + VKD3DSIH_ATOMIC_UMAX, + VKD3DSIH_ATOMIC_UMIN, + VKD3DSIH_ATOMIC_XOR, + VKD3DSIH_BEM, + VKD3DSIH_BFI, + VKD3DSIH_BFREV, + VKD3DSIH_BREAK, + VKD3DSIH_BREAKC, + VKD3DSIH_BREAKP, + VKD3DSIH_BUFINFO, + VKD3DSIH_CALL, + VKD3DSIH_CALLNZ, + VKD3DSIH_CASE, + VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, + VKD3DSIH_CMP, + VKD3DSIH_CND, + VKD3DSIH_CONTINUE, + VKD3DSIH_CONTINUEP, + VKD3DSIH_COUNTBITS, + VKD3DSIH_CRS, + VKD3DSIH_CUT, + VKD3DSIH_CUT_STREAM, + VKD3DSIH_DADD, + VKD3DSIH_DCL, + VKD3DSIH_DCL_CONSTANT_BUFFER, + VKD3DSIH_DCL_FUNCTION_BODY, + VKD3DSIH_DCL_FUNCTION_TABLE, + VKD3DSIH_DCL_GLOBAL_FLAGS, + VKD3DSIH_DCL_GS_INSTANCES, + VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, + VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, + VKD3DSIH_DCL_HS_MAX_TESSFACTOR, + VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, + VKD3DSIH_DCL_INDEX_RANGE, + VKD3DSIH_DCL_INDEXABLE_TEMP, + VKD3DSIH_DCL_INPUT, + VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, + VKD3DSIH_DCL_INPUT_PRIMITIVE, + VKD3DSIH_DCL_INPUT_PS, + VKD3DSIH_DCL_INPUT_PS_SGV, + VKD3DSIH_DCL_INPUT_PS_SIV, + VKD3DSIH_DCL_INPUT_SGV, + VKD3DSIH_DCL_INPUT_SIV, + VKD3DSIH_DCL_INTERFACE, + VKD3DSIH_DCL_OUTPUT, + VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, + VKD3DSIH_DCL_OUTPUT_SIV, + VKD3DSIH_DCL_OUTPUT_TOPOLOGY, + VKD3DSIH_DCL_RESOURCE_RAW, + VKD3DSIH_DCL_RESOURCE_STRUCTURED, + VKD3DSIH_DCL_SAMPLER, + VKD3DSIH_DCL_STREAM, + VKD3DSIH_DCL_TEMPS, + VKD3DSIH_DCL_TESSELLATOR_DOMAIN, + VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, + VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, + VKD3DSIH_DCL_TGSM_RAW, + VKD3DSIH_DCL_TGSM_STRUCTURED, + VKD3DSIH_DCL_THREAD_GROUP, + VKD3DSIH_DCL_UAV_RAW, + VKD3DSIH_DCL_UAV_STRUCTURED, + VKD3DSIH_DCL_UAV_TYPED, + VKD3DSIH_DCL_VERTICES_OUT, + VKD3DSIH_DDIV, + VKD3DSIH_DEF, + VKD3DSIH_DEFAULT, + VKD3DSIH_DEFB, + VKD3DSIH_DEFI, + VKD3DSIH_DEQ, + VKD3DSIH_DFMA, + VKD3DSIH_DGE, + VKD3DSIH_DIV, + VKD3DSIH_DLT, + VKD3DSIH_DMAX, + VKD3DSIH_DMIN, + VKD3DSIH_DMOV, + VKD3DSIH_DMOVC, + VKD3DSIH_DMUL, + VKD3DSIH_DNE, + VKD3DSIH_DP2, + VKD3DSIH_DP2ADD, + VKD3DSIH_DP3, + VKD3DSIH_DP4, + VKD3DSIH_DRCP, + VKD3DSIH_DST, + VKD3DSIH_DSX, + VKD3DSIH_DSX_COARSE, + VKD3DSIH_DSX_FINE, + VKD3DSIH_DSY, + VKD3DSIH_DSY_COARSE, + VKD3DSIH_DSY_FINE, + VKD3DSIH_DTOF, + VKD3DSIH_DTOI, + VKD3DSIH_DTOU, + VKD3DSIH_ELSE, + VKD3DSIH_EMIT, + VKD3DSIH_EMIT_STREAM, + VKD3DSIH_ENDIF, + VKD3DSIH_ENDLOOP, + VKD3DSIH_ENDREP, + VKD3DSIH_ENDSWITCH, + VKD3DSIH_EQ, + VKD3DSIH_EVAL_CENTROID, + VKD3DSIH_EVAL_SAMPLE_INDEX, + VKD3DSIH_EXP, + VKD3DSIH_EXPP, + VKD3DSIH_F16TOF32, + VKD3DSIH_F32TOF16, + VKD3DSIH_FCALL, + VKD3DSIH_FIRSTBIT_HI, + VKD3DSIH_FIRSTBIT_LO, + VKD3DSIH_FIRSTBIT_SHI, + VKD3DSIH_FRC, + VKD3DSIH_FTOD, + VKD3DSIH_FTOI, + VKD3DSIH_FTOU, + VKD3DSIH_GATHER4, + VKD3DSIH_GATHER4_C, + VKD3DSIH_GATHER4_C_S, + VKD3DSIH_GATHER4_PO, + VKD3DSIH_GATHER4_PO_C, + VKD3DSIH_GATHER4_PO_C_S, + VKD3DSIH_GATHER4_PO_S, + VKD3DSIH_GATHER4_S, + VKD3DSIH_GE, + VKD3DSIH_HS_CONTROL_POINT_PHASE, + VKD3DSIH_HS_DECLS, + VKD3DSIH_HS_FORK_PHASE, + VKD3DSIH_HS_JOIN_PHASE, + VKD3DSIH_IADD, + VKD3DSIH_IBFE, + VKD3DSIH_IEQ, + VKD3DSIH_IF, + VKD3DSIH_IFC, + VKD3DSIH_IGE, + VKD3DSIH_ILT, + VKD3DSIH_IMAD, + VKD3DSIH_IMAX, + VKD3DSIH_IMIN, + VKD3DSIH_IMM_ATOMIC_ALLOC, + VKD3DSIH_IMM_ATOMIC_AND, + VKD3DSIH_IMM_ATOMIC_CMP_EXCH, + VKD3DSIH_IMM_ATOMIC_CONSUME, + VKD3DSIH_IMM_ATOMIC_EXCH, + VKD3DSIH_IMM_ATOMIC_IADD, + VKD3DSIH_IMM_ATOMIC_IMAX, + VKD3DSIH_IMM_ATOMIC_IMIN, + VKD3DSIH_IMM_ATOMIC_OR, + VKD3DSIH_IMM_ATOMIC_UMAX, + VKD3DSIH_IMM_ATOMIC_UMIN, + VKD3DSIH_IMM_ATOMIC_XOR, + VKD3DSIH_IMUL, + VKD3DSIH_INE, + VKD3DSIH_INEG, + VKD3DSIH_ISHL, + VKD3DSIH_ISHR, + VKD3DSIH_ITOD, + VKD3DSIH_ITOF, + VKD3DSIH_LABEL, + VKD3DSIH_LD, + VKD3DSIH_LD2DMS, + VKD3DSIH_LD2DMS_S, + VKD3DSIH_LD_RAW, + VKD3DSIH_LD_RAW_S, + VKD3DSIH_LD_S, + VKD3DSIH_LD_STRUCTURED, + VKD3DSIH_LD_STRUCTURED_S, + VKD3DSIH_LD_UAV_TYPED, + VKD3DSIH_LD_UAV_TYPED_S, + VKD3DSIH_LIT, + VKD3DSIH_LOD, + VKD3DSIH_LOG, + VKD3DSIH_LOGP, + VKD3DSIH_LOOP, + VKD3DSIH_LRP, + VKD3DSIH_LT, + VKD3DSIH_M3x2, + VKD3DSIH_M3x3, + VKD3DSIH_M3x4, + VKD3DSIH_M4x3, + VKD3DSIH_M4x4, + VKD3DSIH_MAD, + VKD3DSIH_MAX, + VKD3DSIH_MIN, + VKD3DSIH_MOV, + VKD3DSIH_MOVA, + VKD3DSIH_MOVC, + VKD3DSIH_MUL, + VKD3DSIH_NE, + VKD3DSIH_NOP, + VKD3DSIH_NOT, + VKD3DSIH_NRM, + VKD3DSIH_OR, + VKD3DSIH_PHASE, + VKD3DSIH_POW, + VKD3DSIH_RCP, + VKD3DSIH_REP, + VKD3DSIH_RESINFO, + VKD3DSIH_RET, + VKD3DSIH_RETP, + VKD3DSIH_ROUND_NE, + VKD3DSIH_ROUND_NI, + VKD3DSIH_ROUND_PI, + VKD3DSIH_ROUND_Z, + VKD3DSIH_RSQ, + VKD3DSIH_SAMPLE, + VKD3DSIH_SAMPLE_B, + VKD3DSIH_SAMPLE_B_CL_S, + VKD3DSIH_SAMPLE_C, + VKD3DSIH_SAMPLE_C_CL_S, + VKD3DSIH_SAMPLE_C_LZ, + VKD3DSIH_SAMPLE_C_LZ_S, + VKD3DSIH_SAMPLE_CL_S, + VKD3DSIH_SAMPLE_GRAD, + VKD3DSIH_SAMPLE_GRAD_CL_S, + VKD3DSIH_SAMPLE_INFO, + VKD3DSIH_SAMPLE_LOD, + VKD3DSIH_SAMPLE_LOD_S, + VKD3DSIH_SAMPLE_POS, + VKD3DSIH_SETP, + VKD3DSIH_SGE, + VKD3DSIH_SGN, + VKD3DSIH_SINCOS, + VKD3DSIH_SLT, + VKD3DSIH_SQRT, + VKD3DSIH_STORE_RAW, + VKD3DSIH_STORE_STRUCTURED, + VKD3DSIH_STORE_UAV_TYPED, + VKD3DSIH_SUB, + VKD3DSIH_SWAPC, + VKD3DSIH_SWITCH, + VKD3DSIH_SYNC, + VKD3DSIH_TEX, + VKD3DSIH_TEXBEM, + VKD3DSIH_TEXBEML, + VKD3DSIH_TEXCOORD, + VKD3DSIH_TEXDEPTH, + VKD3DSIH_TEXDP3, + VKD3DSIH_TEXDP3TEX, + VKD3DSIH_TEXKILL, + VKD3DSIH_TEXLDD, + VKD3DSIH_TEXLDL, + VKD3DSIH_TEXM3x2DEPTH, + VKD3DSIH_TEXM3x2PAD, + VKD3DSIH_TEXM3x2TEX, + VKD3DSIH_TEXM3x3, + VKD3DSIH_TEXM3x3DIFF, + VKD3DSIH_TEXM3x3PAD, + VKD3DSIH_TEXM3x3SPEC, + VKD3DSIH_TEXM3x3TEX, + VKD3DSIH_TEXM3x3VSPEC, + VKD3DSIH_TEXREG2AR, + VKD3DSIH_TEXREG2GB, + VKD3DSIH_TEXREG2RGB, + VKD3DSIH_UBFE, + VKD3DSIH_UDIV, + VKD3DSIH_UGE, + VKD3DSIH_ULT, + VKD3DSIH_UMAX, + VKD3DSIH_UMIN, + VKD3DSIH_UMUL, + VKD3DSIH_USHR, + VKD3DSIH_UTOD, + VKD3DSIH_UTOF, + VKD3DSIH_XOR, + + VKD3DSIH_INVALID, +}; + +enum vkd3d_shader_register_type +{ + VKD3DSPR_TEMP = 0, + VKD3DSPR_INPUT = 1, + VKD3DSPR_CONST = 2, + VKD3DSPR_ADDR = 3, + VKD3DSPR_TEXTURE = 3, + VKD3DSPR_RASTOUT = 4, + VKD3DSPR_ATTROUT = 5, + VKD3DSPR_TEXCRDOUT = 6, + VKD3DSPR_OUTPUT = 6, + VKD3DSPR_CONSTINT = 7, + VKD3DSPR_COLOROUT = 8, + VKD3DSPR_DEPTHOUT = 9, + VKD3DSPR_SAMPLER = 10, + VKD3DSPR_CONST2 = 11, + VKD3DSPR_CONST3 = 12, + VKD3DSPR_CONST4 = 13, + VKD3DSPR_CONSTBOOL = 14, + VKD3DSPR_LOOP = 15, + VKD3DSPR_TEMPFLOAT16 = 16, + VKD3DSPR_MISCTYPE = 17, + VKD3DSPR_LABEL = 18, + VKD3DSPR_PREDICATE = 19, + VKD3DSPR_IMMCONST, + VKD3DSPR_IMMCONST64, + VKD3DSPR_CONSTBUFFER, + VKD3DSPR_IMMCONSTBUFFER, + VKD3DSPR_PRIMID, + VKD3DSPR_NULL, + VKD3DSPR_RESOURCE, + VKD3DSPR_UAV, + VKD3DSPR_OUTPOINTID, + VKD3DSPR_FORKINSTID, + VKD3DSPR_JOININSTID, + VKD3DSPR_INCONTROLPOINT, + VKD3DSPR_OUTCONTROLPOINT, + VKD3DSPR_PATCHCONST, + VKD3DSPR_TESSCOORD, + VKD3DSPR_GROUPSHAREDMEM, + VKD3DSPR_THREADID, + VKD3DSPR_THREADGROUPID, + VKD3DSPR_LOCALTHREADID, + VKD3DSPR_LOCALTHREADINDEX, + VKD3DSPR_IDXTEMP, + VKD3DSPR_STREAM, + VKD3DSPR_FUNCTIONBODY, + VKD3DSPR_FUNCTIONPOINTER, + VKD3DSPR_COVERAGE, + VKD3DSPR_SAMPLEMASK, + VKD3DSPR_GSINSTID, + VKD3DSPR_DEPTHOUTGE, + VKD3DSPR_DEPTHOUTLE, + VKD3DSPR_RASTERIZER, + VKD3DSPR_OUTSTENCILREF, + + VKD3DSPR_INVALID = ~0u, +}; + +enum vkd3d_shader_register_precision +{ + VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, + VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, + VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, + VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, + VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, + + VKD3D_SHADER_REGISTER_PRECISION_INVALID = ~0u, +}; + +enum vkd3d_data_type +{ + VKD3D_DATA_FLOAT, + VKD3D_DATA_INT, + VKD3D_DATA_RESOURCE, + VKD3D_DATA_SAMPLER, + VKD3D_DATA_UAV, + VKD3D_DATA_UINT, + VKD3D_DATA_UNORM, + VKD3D_DATA_SNORM, + VKD3D_DATA_OPAQUE, + VKD3D_DATA_MIXED, + VKD3D_DATA_DOUBLE, + VKD3D_DATA_CONTINUED, + VKD3D_DATA_UNUSED, +}; + +enum vkd3d_immconst_type +{ + VKD3D_IMMCONST_SCALAR, + VKD3D_IMMCONST_VEC4, +}; + +enum vkd3d_shader_src_modifier +{ + VKD3DSPSM_NONE = 0, + VKD3DSPSM_NEG = 1, + VKD3DSPSM_BIAS = 2, + VKD3DSPSM_BIASNEG = 3, + VKD3DSPSM_SIGN = 4, + VKD3DSPSM_SIGNNEG = 5, + VKD3DSPSM_COMP = 6, + VKD3DSPSM_X2 = 7, + VKD3DSPSM_X2NEG = 8, + VKD3DSPSM_DZ = 9, + VKD3DSPSM_DW = 10, + VKD3DSPSM_ABS = 11, + VKD3DSPSM_ABSNEG = 12, + VKD3DSPSM_NOT = 13, +}; + +#define VKD3DSP_WRITEMASK_0 0x1u /* .x r */ +#define VKD3DSP_WRITEMASK_1 0x2u /* .y g */ +#define VKD3DSP_WRITEMASK_2 0x4u /* .z b */ +#define VKD3DSP_WRITEMASK_3 0x8u /* .w a */ +#define VKD3DSP_WRITEMASK_ALL 0xfu /* all */ + +enum vkd3d_shader_dst_modifier +{ + VKD3DSPDM_NONE = 0, + VKD3DSPDM_SATURATE = 1, + VKD3DSPDM_PARTIALPRECISION = 2, + VKD3DSPDM_MSAMPCENTROID = 4, +}; + +enum vkd3d_shader_interpolation_mode +{ + VKD3DSIM_NONE = 0, + VKD3DSIM_CONSTANT = 1, + VKD3DSIM_LINEAR = 2, + VKD3DSIM_LINEAR_CENTROID = 3, + VKD3DSIM_LINEAR_NOPERSPECTIVE = 4, + VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID = 5, + VKD3DSIM_LINEAR_SAMPLE = 6, + VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE = 7, +}; + +enum vkd3d_shader_global_flags +{ + VKD3DSGF_REFACTORING_ALLOWED = 0x01, + VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = 0x02, + VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL = 0x04, + VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS = 0x08, + VKD3DSGF_SKIP_OPTIMIZATION = 0x10, + VKD3DSGF_ENABLE_MINIMUM_PRECISION = 0x20, + VKD3DSGF_ENABLE_11_1_DOUBLE_EXTENSIONS = 0x40, +}; + +enum vkd3d_shader_sync_flags +{ + VKD3DSSF_THREAD_GROUP = 0x1, + VKD3DSSF_GROUP_SHARED_MEMORY = 0x2, + VKD3DSSF_GLOBAL_UAV = 0x8, +}; + +enum vkd3d_shader_uav_flags +{ + VKD3DSUF_GLOBALLY_COHERENT = 0x2, + VKD3DSUF_ORDER_PRESERVING_COUNTER = 0x100, +}; + +enum vkd3d_tessellator_domain +{ + VKD3D_TESSELLATOR_DOMAIN_LINE = 1, + VKD3D_TESSELLATOR_DOMAIN_TRIANGLE = 2, + VKD3D_TESSELLATOR_DOMAIN_QUAD = 3, +}; + +#define VKD3DSI_NONE 0x0 +#define VKD3DSI_TEXLD_PROJECT 0x1 +#define VKD3DSI_INDEXED_DYNAMIC 0x4 +#define VKD3DSI_RESINFO_RCP_FLOAT 0x1 +#define VKD3DSI_RESINFO_UINT 0x2 +#define VKD3DSI_SAMPLE_INFO_UINT 0x1 +#define VKD3DSI_SAMPLER_COMPARISON_MODE 0x1 + +#define VKD3DSI_PRECISE_X 0x100 +#define VKD3DSI_PRECISE_Y 0x200 +#define VKD3DSI_PRECISE_Z 0x400 +#define VKD3DSI_PRECISE_W 0x800 +#define VKD3DSI_PRECISE_XYZW (VKD3DSI_PRECISE_X | VKD3DSI_PRECISE_Y \ + | VKD3DSI_PRECISE_Z | VKD3DSI_PRECISE_W) +#define VKD3DSI_PRECISE_SHIFT 8 + +enum vkd3d_shader_rel_op +{ + VKD3D_SHADER_REL_OP_GT = 1, + VKD3D_SHADER_REL_OP_EQ = 2, + VKD3D_SHADER_REL_OP_GE = 3, + VKD3D_SHADER_REL_OP_LT = 4, + VKD3D_SHADER_REL_OP_NE = 5, + VKD3D_SHADER_REL_OP_LE = 6, +}; + +enum vkd3d_shader_conditional_op +{ + VKD3D_SHADER_CONDITIONAL_OP_NZ = 0, + VKD3D_SHADER_CONDITIONAL_OP_Z = 1 +}; + +#define MAX_IMMEDIATE_CONSTANT_BUFFER_SIZE 4096 +#define MAX_REG_OUTPUT 32 + +enum vkd3d_shader_type +{ + VKD3D_SHADER_TYPE_PIXEL, + VKD3D_SHADER_TYPE_VERTEX, + VKD3D_SHADER_TYPE_GEOMETRY, + VKD3D_SHADER_TYPE_HULL, + VKD3D_SHADER_TYPE_DOMAIN, + VKD3D_SHADER_TYPE_GRAPHICS_COUNT, + + VKD3D_SHADER_TYPE_COMPUTE = VKD3D_SHADER_TYPE_GRAPHICS_COUNT, + + VKD3D_SHADER_TYPE_EFFECT, + VKD3D_SHADER_TYPE_TEXTURE, + VKD3D_SHADER_TYPE_LIBRARY, + VKD3D_SHADER_TYPE_COUNT, +}; + +struct vkd3d_shader_version +{ + enum vkd3d_shader_type type; + uint8_t major; + uint8_t minor; +}; + +struct vkd3d_shader_immediate_constant_buffer +{ + unsigned int vec4_count; + uint32_t data[MAX_IMMEDIATE_CONSTANT_BUFFER_SIZE]; +}; + +struct vkd3d_shader_indexable_temp +{ + struct list entry; + unsigned int register_idx; + unsigned int register_size; + unsigned int component_count; +}; + +struct vkd3d_shader_register_index +{ + const struct vkd3d_shader_src_param *rel_addr; + unsigned int offset; +}; + +struct vkd3d_shader_register +{ + enum vkd3d_shader_register_type type; + enum vkd3d_shader_register_precision precision; + bool non_uniform; + enum vkd3d_data_type data_type; + struct vkd3d_shader_register_index idx[3]; + enum vkd3d_immconst_type immconst_type; + union + { + DWORD immconst_uint[VKD3D_VEC4_SIZE]; + float immconst_float[VKD3D_VEC4_SIZE]; + uint64_t immconst_uint64[VKD3D_DVEC2_SIZE]; + double immconst_double[VKD3D_DVEC2_SIZE]; + unsigned fp_body_idx; + } u; +}; + +struct vkd3d_shader_dst_param +{ + struct vkd3d_shader_register reg; + DWORD write_mask; + DWORD modifiers; + DWORD shift; +}; + +struct vkd3d_shader_src_param +{ + struct vkd3d_shader_register reg; + DWORD swizzle; + enum vkd3d_shader_src_modifier modifiers; +}; + +struct vkd3d_shader_index_range +{ + struct vkd3d_shader_dst_param dst; + unsigned int register_count; +}; + +struct vkd3d_shader_register_range +{ + unsigned int space; + unsigned int first, last; +}; + +struct vkd3d_shader_resource +{ + struct vkd3d_shader_dst_param reg; + struct vkd3d_shader_register_range range; +}; + +enum vkd3d_decl_usage +{ + VKD3D_DECL_USAGE_POSITION = 0, + VKD3D_DECL_USAGE_BLEND_WEIGHT = 1, + VKD3D_DECL_USAGE_BLEND_INDICES = 2, + VKD3D_DECL_USAGE_NORMAL = 3, + VKD3D_DECL_USAGE_PSIZE = 4, + VKD3D_DECL_USAGE_TEXCOORD = 5, + VKD3D_DECL_USAGE_TANGENT = 6, + VKD3D_DECL_USAGE_BINORMAL = 7, + VKD3D_DECL_USAGE_TESS_FACTOR = 8, + VKD3D_DECL_USAGE_POSITIONT = 9, + VKD3D_DECL_USAGE_COLOR = 10, + VKD3D_DECL_USAGE_FOG = 11, + VKD3D_DECL_USAGE_DEPTH = 12, + VKD3D_DECL_USAGE_SAMPLE = 13 +}; + +struct vkd3d_shader_semantic +{ + enum vkd3d_decl_usage usage; + unsigned int usage_idx; + enum vkd3d_shader_resource_type resource_type; + enum vkd3d_data_type resource_data_type[VKD3D_VEC4_SIZE]; + struct vkd3d_shader_resource resource; +}; + +enum vkd3d_shader_input_sysval_semantic +{ + VKD3D_SIV_NONE = 0, + VKD3D_SIV_POSITION = 1, + VKD3D_SIV_CLIP_DISTANCE = 2, + VKD3D_SIV_CULL_DISTANCE = 3, + VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX = 4, + VKD3D_SIV_VIEWPORT_ARRAY_INDEX = 5, + VKD3D_SIV_VERTEX_ID = 6, + VKD3D_SIV_PRIMITIVE_ID = 7, + VKD3D_SIV_INSTANCE_ID = 8, + VKD3D_SIV_IS_FRONT_FACE = 9, + VKD3D_SIV_SAMPLE_INDEX = 10, + VKD3D_SIV_QUAD_U0_TESS_FACTOR = 11, + VKD3D_SIV_QUAD_V0_TESS_FACTOR = 12, + VKD3D_SIV_QUAD_U1_TESS_FACTOR = 13, + VKD3D_SIV_QUAD_V1_TESS_FACTOR = 14, + VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR = 15, + VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR = 16, + VKD3D_SIV_TRIANGLE_U_TESS_FACTOR = 17, + VKD3D_SIV_TRIANGLE_V_TESS_FACTOR = 18, + VKD3D_SIV_TRIANGLE_W_TESS_FACTOR = 19, + VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR = 20, + VKD3D_SIV_LINE_DETAIL_TESS_FACTOR = 21, + VKD3D_SIV_LINE_DENSITY_TESS_FACTOR = 22, +}; + +struct vkd3d_shader_desc +{ + const uint32_t *byte_code; + size_t byte_code_size; + struct vkd3d_shader_signature input_signature; + struct vkd3d_shader_signature output_signature; + struct vkd3d_shader_signature patch_constant_signature; +}; + +struct vkd3d_shader_register_semantic +{ + struct vkd3d_shader_dst_param reg; + enum vkd3d_shader_input_sysval_semantic sysval_semantic; +}; + +struct vkd3d_shader_sampler +{ + struct vkd3d_shader_src_param src; + struct vkd3d_shader_register_range range; +}; + +struct vkd3d_shader_constant_buffer +{ + struct vkd3d_shader_src_param src; + unsigned int size; + struct vkd3d_shader_register_range range; +}; + +struct vkd3d_shader_structured_resource +{ + struct vkd3d_shader_resource resource; + unsigned int byte_stride; +}; + +struct vkd3d_shader_raw_resource +{ + struct vkd3d_shader_resource resource; +}; + +struct vkd3d_shader_tgsm +{ + unsigned int size; + unsigned int stride; +}; + +struct vkd3d_shader_tgsm_raw +{ + struct vkd3d_shader_dst_param reg; + unsigned int byte_count; +}; + +struct vkd3d_shader_tgsm_structured +{ + struct vkd3d_shader_dst_param reg; + unsigned int byte_stride; + unsigned int structure_count; +}; + +struct vkd3d_shader_thread_group_size +{ + unsigned int x, y, z; +}; + +struct vkd3d_shader_function_table_pointer +{ + unsigned int index; + unsigned int array_size; + unsigned int body_count; + unsigned int table_count; +}; + +struct vkd3d_shader_texel_offset +{ + signed char u, v, w; +}; + +enum vkd3d_primitive_type +{ + VKD3D_PT_UNDEFINED = 0, + VKD3D_PT_POINTLIST = 1, + VKD3D_PT_LINELIST = 2, + VKD3D_PT_LINESTRIP = 3, + VKD3D_PT_TRIANGLELIST = 4, + VKD3D_PT_TRIANGLESTRIP = 5, + VKD3D_PT_TRIANGLEFAN = 6, + VKD3D_PT_LINELIST_ADJ = 10, + VKD3D_PT_LINESTRIP_ADJ = 11, + VKD3D_PT_TRIANGLELIST_ADJ = 12, + VKD3D_PT_TRIANGLESTRIP_ADJ = 13, + VKD3D_PT_PATCH = 14, +}; + +struct vkd3d_shader_primitive_type +{ + enum vkd3d_primitive_type type; + unsigned int patch_vertex_count; +}; + +struct vkd3d_shader_instruction +{ + enum vkd3d_shader_opcode handler_idx; + DWORD flags; + unsigned int dst_count; + unsigned int src_count; + const struct vkd3d_shader_dst_param *dst; + const struct vkd3d_shader_src_param *src; + struct vkd3d_shader_texel_offset texel_offset; + enum vkd3d_shader_resource_type resource_type; + unsigned int resource_stride; + enum vkd3d_data_type resource_data_type[VKD3D_VEC4_SIZE]; + bool coissue, structured, raw; + const struct vkd3d_shader_src_param *predicate; + union + { + struct vkd3d_shader_semantic semantic; + struct vkd3d_shader_register_semantic register_semantic; + struct vkd3d_shader_primitive_type primitive_type; + struct vkd3d_shader_dst_param dst; + struct vkd3d_shader_constant_buffer cb; + struct vkd3d_shader_sampler sampler; + unsigned int count; + unsigned int index; + const struct vkd3d_shader_immediate_constant_buffer *icb; + struct vkd3d_shader_raw_resource raw_resource; + struct vkd3d_shader_structured_resource structured_resource; + struct vkd3d_shader_tgsm_raw tgsm_raw; + struct vkd3d_shader_tgsm_structured tgsm_structured; + struct vkd3d_shader_thread_group_size thread_group_size; + enum vkd3d_tessellator_domain tessellator_domain; + enum vkd3d_shader_tessellator_output_primitive tessellator_output_primitive; + enum vkd3d_shader_tessellator_partitioning tessellator_partitioning; + float max_tessellation_factor; + struct vkd3d_shader_index_range index_range; + struct vkd3d_shader_indexable_temp indexable_temp; + struct vkd3d_shader_function_table_pointer fp; + } declaration; +}; + +static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) +{ + return ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w; +} + +static inline bool vkd3d_shader_register_is_input(const struct vkd3d_shader_register *reg) +{ + return reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT; +} + +static inline bool vkd3d_shader_register_is_output(const struct vkd3d_shader_register *reg) +{ + return reg->type == VKD3DSPR_OUTPUT || reg->type == VKD3DSPR_COLOROUT; +} + +struct vkd3d_shader_location +{ + const char *source_name; + unsigned int line, column; +}; + +struct vkd3d_shader_parser +{ + struct vkd3d_shader_message_context *message_context; + struct vkd3d_shader_location location; + bool failed; + + struct vkd3d_shader_desc shader_desc; + struct vkd3d_shader_version shader_version; + const uint32_t *ptr; + const struct vkd3d_shader_parser_ops *ops; +}; + +struct vkd3d_shader_parser_ops +{ + void (*parser_reset)(struct vkd3d_shader_parser *parser); + void (*parser_destroy)(struct vkd3d_shader_parser *parser); + void (*parser_read_instruction)(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *instruction); + bool (*parser_is_end)(struct vkd3d_shader_parser *parser); +}; + +void vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); +void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, + struct vkd3d_shader_message_context *message_context, const char *source_name, + const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops); +void vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); + +static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parser) +{ + parser->ops->parser_destroy(parser); +} + +static inline bool vkd3d_shader_parser_is_end(struct vkd3d_shader_parser *parser) +{ + return parser->ops->parser_is_end(parser); +} + +static inline void vkd3d_shader_parser_read_instruction(struct vkd3d_shader_parser *parser, + struct vkd3d_shader_instruction *instruction) +{ + parser->ops->parser_read_instruction(parser, instruction); +} + +static inline void vkd3d_shader_parser_reset(struct vkd3d_shader_parser *parser) +{ + parser->ops->parser_reset(parser); +} + +void vkd3d_shader_trace(struct vkd3d_shader_parser *parser); + +const char *shader_get_type_prefix(enum vkd3d_shader_type type); + +struct vkd3d_string_buffer +{ + char *buffer; + size_t buffer_size, content_size; +}; + +struct vkd3d_string_buffer_cache +{ + struct vkd3d_string_buffer **buffers; + size_t count, max_count, capacity; +}; + +enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out); +void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); +struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); +void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); +void vkd3d_string_buffer_cache_cleanup(struct vkd3d_string_buffer_cache *list); +void vkd3d_string_buffer_cache_init(struct vkd3d_string_buffer_cache *list); +int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f); +int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d); +int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *format, ...) VKD3D_PRINTF_FUNC(2, 3); +void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *list, struct vkd3d_string_buffer *buffer); +#define vkd3d_string_buffer_trace(buffer) \ + vkd3d_string_buffer_trace_(buffer, __FUNCTION__) +void vkd3d_string_buffer_trace_(const struct vkd3d_string_buffer *buffer, const char *function); +int vkd3d_string_buffer_vprintf(struct vkd3d_string_buffer *buffer, const char *format, va_list args); + +struct vkd3d_bytecode_buffer +{ + uint8_t *data; + size_t size, capacity; + int status; +}; + +size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size); +void set_u32(struct vkd3d_bytecode_buffer *buffer, size_t offset, uint32_t value); + +static inline size_t put_u32(struct vkd3d_bytecode_buffer *buffer, uint32_t value) +{ + return bytecode_put_bytes(buffer, &value, sizeof(value)); +} + +static inline size_t put_f32(struct vkd3d_bytecode_buffer *buffer, float value) +{ + return bytecode_put_bytes(buffer, &value, sizeof(value)); +} + +static inline size_t put_string(struct vkd3d_bytecode_buffer *buffer, const char *string) +{ + return bytecode_put_bytes(buffer, string, strlen(string) + 1); +} + +static inline size_t bytecode_get_size(struct vkd3d_bytecode_buffer *buffer) +{ + return buffer->size; +} + +struct vkd3d_shader_message_context +{ + enum vkd3d_shader_log_level log_level; + struct vkd3d_string_buffer messages; +}; + +void vkd3d_shader_message_context_cleanup(struct vkd3d_shader_message_context *context); +bool vkd3d_shader_message_context_copy_messages(struct vkd3d_shader_message_context *context, char **out); +void vkd3d_shader_message_context_init(struct vkd3d_shader_message_context *context, + enum vkd3d_shader_log_level log_level); +void vkd3d_shader_message_context_trace_messages_(const struct vkd3d_shader_message_context *context, + const char *function); +#define vkd3d_shader_message_context_trace_messages(context) \ + vkd3d_shader_message_context_trace_messages_(context, __FUNCTION__) +void vkd3d_shader_error(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(4, 5); +void vkd3d_shader_verror(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_error error, const char *format, va_list args); +void vkd3d_shader_vnote(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_log_level level, const char *format, va_list args); +void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_error error, const char *format, va_list args); + +void vkd3d_shader_dump_shader(enum vkd3d_shader_source_type source_type, + enum vkd3d_shader_type shader_type, const struct vkd3d_shader_code *shader); +void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function); +#define vkd3d_shader_trace_text(text, size) \ + vkd3d_shader_trace_text_(text, size, __FUNCTION__) + +int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); +int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); + +void free_shader_desc(struct vkd3d_shader_desc *desc); + +int shader_parse_input_signature(const void *dxbc, size_t dxbc_length, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature); + +struct vkd3d_glsl_generator; + +struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, + struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); +int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, + struct vkd3d_shader_parser *parser, struct vkd3d_shader_code *out); +void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); + +struct vkd3d_dxbc_compiler; + +struct vkd3d_dxbc_compiler *vkd3d_dxbc_compiler_create(const struct vkd3d_shader_version *shader_version, + const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); +int vkd3d_dxbc_compiler_handle_instruction(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_instruction *instruction); +int vkd3d_dxbc_compiler_generate_spirv(struct vkd3d_dxbc_compiler *compiler, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *spirv); +void vkd3d_dxbc_compiler_destroy(struct vkd3d_dxbc_compiler *compiler); + +void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]); + +int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + +int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + +static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_type( + enum vkd3d_data_type data_type) +{ + switch (data_type) + { + case VKD3D_DATA_FLOAT: + case VKD3D_DATA_UNORM: + case VKD3D_DATA_SNORM: + return VKD3D_SHADER_COMPONENT_FLOAT; + case VKD3D_DATA_UINT: + return VKD3D_SHADER_COMPONENT_UINT; + case VKD3D_DATA_INT: + return VKD3D_SHADER_COMPONENT_INT; + case VKD3D_DATA_DOUBLE: + return VKD3D_SHADER_COMPONENT_DOUBLE; + default: + FIXME("Unhandled data type %#x.\n", data_type); + /* fall-through */ + case VKD3D_DATA_MIXED: + return VKD3D_SHADER_COMPONENT_UINT; + } +} + +static inline enum vkd3d_data_type vkd3d_data_type_from_component_type( + enum vkd3d_shader_component_type component_type) +{ + switch (component_type) + { + case VKD3D_SHADER_COMPONENT_FLOAT: + return VKD3D_DATA_FLOAT; + case VKD3D_SHADER_COMPONENT_UINT: + return VKD3D_DATA_UINT; + case VKD3D_SHADER_COMPONENT_INT: + return VKD3D_DATA_INT; + case VKD3D_SHADER_COMPONENT_DOUBLE: + return VKD3D_DATA_DOUBLE; + default: + FIXME("Unhandled component type %#x.\n", component_type); + return VKD3D_DATA_FLOAT; + } +} + +static inline unsigned int vkd3d_write_mask_get_component_idx(DWORD write_mask) +{ + unsigned int i; + + assert(write_mask); + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) + return i; + } + + FIXME("Invalid write mask %#x.\n", write_mask); + return 0; +} + +static inline unsigned int vkd3d_write_mask_component_count(DWORD write_mask) +{ + unsigned int count = vkd3d_popcount(write_mask & VKD3DSP_WRITEMASK_ALL); + assert(1 <= count && count <= VKD3D_VEC4_SIZE); + return count; +} + +static inline unsigned int vkd3d_write_mask_from_component_count(unsigned int component_count) +{ + assert(component_count <= VKD3D_VEC4_SIZE); + return (VKD3DSP_WRITEMASK_0 << component_count) - 1; +} + +static inline unsigned int vkd3d_write_mask_64_from_32(DWORD write_mask32) +{ + unsigned int write_mask64 = write_mask32 | (write_mask32 >> 1); + return (write_mask64 & VKD3DSP_WRITEMASK_0) | ((write_mask64 & VKD3DSP_WRITEMASK_2) >> 1); +} + +static inline unsigned int vkd3d_write_mask_32_from_64(unsigned int write_mask64) +{ + unsigned int write_mask32 = (write_mask64 | (write_mask64 << 1)) + & (VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_2); + return write_mask32 | (write_mask32 << 1); +} + +static inline unsigned int vkd3d_swizzle_get_component(DWORD swizzle, + unsigned int idx) +{ + return (swizzle >> VKD3D_SHADER_SWIZZLE_SHIFT(idx)) & VKD3D_SHADER_SWIZZLE_MASK; +} + +static inline unsigned int vkd3d_swizzle_get_component64(DWORD swizzle, + unsigned int idx) +{ + return ((swizzle >> VKD3D_SHADER_SWIZZLE_SHIFT(idx * 2)) & VKD3D_SHADER_SWIZZLE_MASK) / 2u; +} + +static inline unsigned int vkd3d_compact_swizzle(unsigned int swizzle, unsigned int write_mask) +{ + unsigned int i, compacted_swizzle = 0; + + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) + { + compacted_swizzle <<= VKD3D_SHADER_SWIZZLE_SHIFT(1); + compacted_swizzle |= vkd3d_swizzle_get_component(swizzle, i); + } + } + + return compacted_swizzle; +} + +struct vkd3d_struct +{ + enum vkd3d_shader_structure_type type; + const void *next; +}; + +#define vkd3d_find_struct(c, t) vkd3d_find_struct_(c, VKD3D_SHADER_STRUCTURE_TYPE_##t) +static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, + enum vkd3d_shader_structure_type type) +{ + while (chain) + { + if (chain->type == type) + return (void *)chain; + + chain = chain->next; + } + + return NULL; +} + +#define VKD3D_DXBC_MAX_SOURCE_COUNT 6 +#define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) + +#define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') +#define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') +#define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') +#define TAG_ISG1 VKD3D_MAKE_TAG('I', 'S', 'G', '1') +#define TAG_ISGN VKD3D_MAKE_TAG('I', 'S', 'G', 'N') +#define TAG_OSG1 VKD3D_MAKE_TAG('O', 'S', 'G', '1') +#define TAG_OSG5 VKD3D_MAKE_TAG('O', 'S', 'G', '5') +#define TAG_OSGN VKD3D_MAKE_TAG('O', 'S', 'G', 'N') +#define TAG_PCSG VKD3D_MAKE_TAG('P', 'C', 'S', 'G') +#define TAG_PSG1 VKD3D_MAKE_TAG('P', 'S', 'G', '1') +#define TAG_RD11 VKD3D_MAKE_TAG('R', 'D', '1', '1') +#define TAG_RDEF VKD3D_MAKE_TAG('R', 'D', 'E', 'F') +#define TAG_RTS0 VKD3D_MAKE_TAG('R', 'T', 'S', '0') +#define TAG_SHDR VKD3D_MAKE_TAG('S', 'H', 'D', 'R') +#define TAG_SHEX VKD3D_MAKE_TAG('S', 'H', 'E', 'X') +#define TAG_TEXT VKD3D_MAKE_TAG('T', 'E', 'X', 'T') + +struct dxbc_writer_section +{ + uint32_t tag; + const uint8_t *data; + size_t size; +}; + +#define DXBC_MAX_SECTION_COUNT 5 + +struct dxbc_writer +{ + unsigned int section_count; + struct dxbc_writer_section sections[DXBC_MAX_SECTION_COUNT]; +}; + +void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size); +void dxbc_writer_init(struct dxbc_writer *dxbc); +int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); + +#endif /* __VKD3D_SHADER_PRIVATE_H */ diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c new file mode 100644 index 00000000000..e7375fb805b --- /dev/null +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -0,0 +1,6589 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * Copyright 2016 Henri Verbeet for CodeWeavers + * Copyright 2021 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_private.h" + +static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence); + +HRESULT vkd3d_queue_create(struct d3d12_device *device, + uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_queue *object; + int rc; + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if ((rc = vkd3d_mutex_init(&object->mutex))) + { + ERR("Failed to initialize mutex, error %d.\n", rc); + vkd3d_free(object); + return hresult_from_errno(rc); + } + + object->completed_sequence_number = 0; + object->submitted_sequence_number = 0; + + object->vk_family_index = family_index; + object->vk_queue_flags = properties->queueFlags; + object->timestamp_bits = properties->timestampValidBits; + + object->semaphores = NULL; + object->semaphores_size = 0; + object->semaphore_count = 0; + + memset(object->old_vk_semaphores, 0, sizeof(object->old_vk_semaphores)); + + VK_CALL(vkGetDeviceQueue(device->vk_device, family_index, 0, &object->vk_queue)); + + TRACE("Created queue %p for queue family index %u.\n", object, family_index); + + *queue = object; + + return S_OK; +} + +void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + unsigned int i; + int rc; + + if ((rc = vkd3d_mutex_lock(&queue->mutex))) + ERR("Failed to lock mutex, error %d.\n", rc); + + for (i = 0; i < queue->semaphore_count; ++i) + VK_CALL(vkDestroySemaphore(device->vk_device, queue->semaphores[i].vk_semaphore, NULL)); + + vkd3d_free(queue->semaphores); + + for (i = 0; i < ARRAY_SIZE(queue->old_vk_semaphores); ++i) + { + if (queue->old_vk_semaphores[i]) + VK_CALL(vkDestroySemaphore(device->vk_device, queue->old_vk_semaphores[i], NULL)); + } + + if (!rc) + vkd3d_mutex_unlock(&queue->mutex); + + vkd3d_mutex_destroy(&queue->mutex); + vkd3d_free(queue); +} + +VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue) +{ + int rc; + + TRACE("queue %p.\n", queue); + + if ((rc = vkd3d_mutex_lock(&queue->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return VK_NULL_HANDLE; + } + + assert(queue->vk_queue); + return queue->vk_queue; +} + +void vkd3d_queue_release(struct vkd3d_queue *queue) +{ + TRACE("queue %p.\n", queue); + + vkd3d_mutex_unlock(&queue->mutex); +} + +static VkResult vkd3d_queue_wait_idle(struct vkd3d_queue *queue, + const struct vkd3d_vk_device_procs *vk_procs) +{ + VkQueue vk_queue; + VkResult vr; + + if ((vk_queue = vkd3d_queue_acquire(queue))) + { + vr = VK_CALL(vkQueueWaitIdle(vk_queue)); + vkd3d_queue_release(queue); + + if (vr < 0) + WARN("Failed to wait for queue, vr %d.\n", vr); + } + else + { + ERR("Failed to acquire queue %p.\n", queue); + vr = VK_ERROR_OUT_OF_HOST_MEMORY; + } + + return vr; +} + +static void vkd3d_queue_update_sequence_number(struct vkd3d_queue *queue, + uint64_t sequence_number, struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + unsigned int destroyed_semaphore_count = 0; + uint64_t completed_sequence_number; + VkSemaphore vk_semaphore; + unsigned int i, j; + int rc; + + if ((rc = vkd3d_mutex_lock(&queue->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + completed_sequence_number = queue->completed_sequence_number; + queue->completed_sequence_number = max(sequence_number, queue->completed_sequence_number); + + TRACE("Queue %p sequence number %"PRIu64" -> %"PRIu64".\n", + queue, completed_sequence_number, queue->completed_sequence_number); + + for (i = 0; i < queue->semaphore_count; ++i) + { + if (queue->semaphores[i].sequence_number > queue->completed_sequence_number) + break; + + vk_semaphore = queue->semaphores[i].vk_semaphore; + + /* Try to store the Vulkan semaphore for reuse. */ + for (j = 0; j < ARRAY_SIZE(queue->old_vk_semaphores); ++j) + { + if (queue->old_vk_semaphores[j] == VK_NULL_HANDLE) + { + queue->old_vk_semaphores[j] = vk_semaphore; + vk_semaphore = VK_NULL_HANDLE; + break; + } + } + + if (!vk_semaphore) + continue; + + VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL)); + ++destroyed_semaphore_count; + } + if (i > 0) + { + queue->semaphore_count -= i; + memmove(queue->semaphores, &queue->semaphores[i], queue->semaphore_count * sizeof(*queue->semaphores)); + } + + if (destroyed_semaphore_count) + TRACE("Destroyed %u Vulkan semaphores.\n", destroyed_semaphore_count); + + vkd3d_mutex_unlock(&queue->mutex); +} + +static uint64_t vkd3d_queue_reset_sequence_number_locked(struct vkd3d_queue *queue) +{ + unsigned int i; + + WARN("Resetting sequence number for queue %p.\n", queue); + + queue->completed_sequence_number = 0; + queue->submitted_sequence_number = 1; + + for (i = 0; i < queue->semaphore_count; ++i) + queue->semaphores[i].sequence_number = queue->submitted_sequence_number; + + return queue->submitted_sequence_number; +} + +static VkResult vkd3d_queue_create_vk_semaphore_locked(struct vkd3d_queue *queue, + struct d3d12_device *device, VkSemaphore *vk_semaphore) +{ + const struct vkd3d_vk_device_procs *vk_procs; + VkSemaphoreCreateInfo semaphore_info; + unsigned int i; + VkResult vr; + + *vk_semaphore = VK_NULL_HANDLE; + + for (i = 0; i < ARRAY_SIZE(queue->old_vk_semaphores); ++i) + { + if ((*vk_semaphore = queue->old_vk_semaphores[i])) + { + queue->old_vk_semaphores[i] = VK_NULL_HANDLE; + break; + } + } + + if (*vk_semaphore) + return VK_SUCCESS; + + vk_procs = &device->vk_procs; + + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphore_info.pNext = NULL; + semaphore_info.flags = 0; + if ((vr = VK_CALL(vkCreateSemaphore(device->vk_device, &semaphore_info, NULL, vk_semaphore))) < 0) + { + WARN("Failed to create Vulkan semaphore, vr %d.\n", vr); + *vk_semaphore = VK_NULL_HANDLE; + } + + return vr; +} + +/* Fence worker thread */ +static HRESULT vkd3d_enqueue_gpu_fence(struct vkd3d_fence_worker *worker, + VkFence vk_fence, struct d3d12_fence *fence, uint64_t value, + struct vkd3d_queue *queue, uint64_t queue_sequence_number) +{ + struct vkd3d_waiting_fence *waiting_fence; + int rc; + + TRACE("worker %p, fence %p, value %#"PRIx64".\n", worker, fence, value); + + if ((rc = vkd3d_mutex_lock(&worker->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if (!vkd3d_array_reserve((void **)&worker->enqueued_fences, &worker->enqueued_fences_size, + worker->enqueued_fence_count + 1, sizeof(*worker->enqueued_fences))) + { + ERR("Failed to add GPU fence.\n"); + vkd3d_mutex_unlock(&worker->mutex); + return E_OUTOFMEMORY; + } + + worker->enqueued_fences[worker->enqueued_fence_count].vk_fence = vk_fence; + waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence; + waiting_fence->fence = fence; + waiting_fence->value = value; + waiting_fence->queue = queue; + waiting_fence->queue_sequence_number = queue_sequence_number; + ++worker->enqueued_fence_count; + + InterlockedIncrement(&fence->pending_worker_operation_count); + + vkd3d_cond_signal(&worker->cond); + vkd3d_mutex_unlock(&worker->mutex); + + return S_OK; +} + +static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, struct d3d12_fence *fence) +{ + LONG count; + int rc; + + if (!(count = InterlockedAdd(&fence->pending_worker_operation_count, 0))) + return; + + WARN("Waiting for %u pending fence operations (fence %p).\n", count, fence); + + if ((rc = vkd3d_mutex_lock(&worker->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + while ((count = InterlockedAdd(&fence->pending_worker_operation_count, 0))) + { + TRACE("Still waiting for %u pending fence operations (fence %p).\n", count, fence); + + worker->pending_fence_destruction = true; + vkd3d_cond_signal(&worker->cond); + + vkd3d_cond_wait(&worker->fence_destruction_cond, &worker->mutex); + } + + TRACE("Removed fence %p.\n", fence); + + vkd3d_mutex_unlock(&worker->mutex); +} + +static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_worker *worker) +{ + unsigned int i; + size_t count; + bool ret; + + if (!worker->enqueued_fence_count) + return; + + count = worker->fence_count + worker->enqueued_fence_count; + + ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size, + count, sizeof(*worker->vk_fences)); + ret &= vkd3d_array_reserve((void **)&worker->fences, &worker->fences_size, + count, sizeof(*worker->fences)); + if (!ret) + { + ERR("Failed to reserve memory.\n"); + return; + } + + for (i = 0; i < worker->enqueued_fence_count; ++i) + { + struct vkd3d_enqueued_fence *current = &worker->enqueued_fences[i]; + + worker->vk_fences[worker->fence_count] = current->vk_fence; + worker->fences[worker->fence_count] = current->waiting_fence; + ++worker->fence_count; + } + assert(worker->fence_count == count); + worker->enqueued_fence_count = 0; +} + +static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker) +{ + struct d3d12_device *device = worker->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + unsigned int i, j; + VkFence vk_fence; + HRESULT hr; + int vr; + + if (!worker->fence_count) + return; + + vr = VK_CALL(vkWaitForFences(device->vk_device, + worker->fence_count, worker->vk_fences, VK_FALSE, ~(uint64_t)0)); + if (vr == VK_TIMEOUT) + return; + if (vr != VK_SUCCESS) + { + ERR("Failed to wait for Vulkan fences, vr %d.\n", vr); + return; + } + + for (i = 0, j = 0; i < worker->fence_count; ++i) + { + vk_fence = worker->vk_fences[i]; + if (!(vr = VK_CALL(vkGetFenceStatus(device->vk_device, vk_fence)))) + { + struct vkd3d_waiting_fence *current = &worker->fences[i]; + + TRACE("Signaling fence %p value %#"PRIx64".\n", current->fence, current->value); + if (FAILED(hr = d3d12_fence_signal(current->fence, current->value, vk_fence))) + ERR("Failed to signal D3D12 fence, hr %#x.\n", hr); + + InterlockedDecrement(¤t->fence->pending_worker_operation_count); + + vkd3d_queue_update_sequence_number(current->queue, current->queue_sequence_number, device); + continue; + } + + if (vr != VK_NOT_READY) + ERR("Failed to get Vulkan fence status, vr %d.\n", vr); + + if (i != j) + { + worker->vk_fences[j] = worker->vk_fences[i]; + worker->fences[j] = worker->fences[i]; + } + ++j; + } + worker->fence_count = j; +} + +static void *vkd3d_fence_worker_main(void *arg) +{ + struct vkd3d_fence_worker *worker = arg; + int rc; + + vkd3d_set_thread_name("vkd3d_fence"); + + for (;;) + { + vkd3d_wait_for_gpu_fences(worker); + + if (!worker->fence_count || InterlockedAdd(&worker->enqueued_fence_count, 0)) + { + if ((rc = vkd3d_mutex_lock(&worker->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + break; + } + + if (worker->pending_fence_destruction) + { + vkd3d_cond_broadcast(&worker->fence_destruction_cond); + worker->pending_fence_destruction = false; + } + + if (worker->enqueued_fence_count) + { + vkd3d_fence_worker_move_enqueued_fences_locked(worker); + } + else + { + if (worker->should_exit) + { + vkd3d_mutex_unlock(&worker->mutex); + break; + } + + if ((rc = vkd3d_cond_wait(&worker->cond, &worker->mutex))) + { + ERR("Failed to wait on condition variable, error %d.\n", rc); + vkd3d_mutex_unlock(&worker->mutex); + break; + } + } + + vkd3d_mutex_unlock(&worker->mutex); + } + } + + return NULL; +} + +HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, + struct d3d12_device *device) +{ + HRESULT hr; + int rc; + + TRACE("worker %p.\n", worker); + + worker->should_exit = false; + worker->pending_fence_destruction = false; + worker->device = device; + + worker->enqueued_fence_count = 0; + worker->enqueued_fences = NULL; + worker->enqueued_fences_size = 0; + + worker->fence_count = 0; + + worker->vk_fences = NULL; + worker->vk_fences_size = 0; + worker->fences = NULL; + worker->fences_size = 0; + + if ((rc = vkd3d_mutex_init(&worker->mutex))) + { + ERR("Failed to initialize mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if ((rc = vkd3d_cond_init(&worker->cond))) + { + ERR("Failed to initialize condition variable, error %d.\n", rc); + vkd3d_mutex_destroy(&worker->mutex); + return hresult_from_errno(rc); + } + + if ((rc = vkd3d_cond_init(&worker->fence_destruction_cond))) + { + ERR("Failed to initialize condition variable, error %d.\n", rc); + vkd3d_mutex_destroy(&worker->mutex); + vkd3d_cond_destroy(&worker->cond); + return hresult_from_errno(rc); + } + + if (FAILED(hr = vkd3d_create_thread(device->vkd3d_instance, + vkd3d_fence_worker_main, worker, &worker->thread))) + { + vkd3d_mutex_destroy(&worker->mutex); + vkd3d_cond_destroy(&worker->cond); + vkd3d_cond_destroy(&worker->fence_destruction_cond); + } + + return hr; +} + +HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, + struct d3d12_device *device) +{ + HRESULT hr; + int rc; + + TRACE("worker %p.\n", worker); + + if ((rc = vkd3d_mutex_lock(&worker->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + worker->should_exit = true; + vkd3d_cond_signal(&worker->cond); + + vkd3d_mutex_unlock(&worker->mutex); + + if (FAILED(hr = vkd3d_join_thread(device->vkd3d_instance, &worker->thread))) + return hr; + + vkd3d_mutex_destroy(&worker->mutex); + vkd3d_cond_destroy(&worker->cond); + vkd3d_cond_destroy(&worker->fence_destruction_cond); + + vkd3d_free(worker->enqueued_fences); + vkd3d_free(worker->vk_fences); + vkd3d_free(worker->fences); + + return S_OK; +} + +static const struct d3d12_root_parameter *root_signature_get_parameter( + const struct d3d12_root_signature *root_signature, unsigned int index) +{ + assert(index < root_signature->parameter_count); + return &root_signature->parameters[index]; +} + +static const struct d3d12_root_descriptor_table *root_signature_get_descriptor_table( + const struct d3d12_root_signature *root_signature, unsigned int index) +{ + const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); + assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE); + return &p->u.descriptor_table; +} + +static const struct d3d12_root_constant *root_signature_get_32bit_constants( + const struct d3d12_root_signature *root_signature, unsigned int index) +{ + const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); + assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS); + return &p->u.constant; +} + +static const struct d3d12_root_parameter *root_signature_get_root_descriptor( + const struct d3d12_root_signature *root_signature, unsigned int index) +{ + const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); + assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV + || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_SRV + || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV); + return p; +} + +/* ID3D12Fence */ +static struct d3d12_fence *impl_from_ID3D12Fence(ID3D12Fence *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence_iface); +} + +static VkResult d3d12_fence_create_vk_fence(struct d3d12_fence *fence, VkFence *vk_fence) +{ + const struct vkd3d_vk_device_procs *vk_procs; + struct d3d12_device *device = fence->device; + VkFenceCreateInfo fence_info; + unsigned int i; + VkResult vr; + int rc; + + *vk_fence = VK_NULL_HANDLE; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + goto create_fence; + } + + for (i = 0; i < ARRAY_SIZE(fence->old_vk_fences); ++i) + { + if ((*vk_fence = fence->old_vk_fences[i])) + { + fence->old_vk_fences[i] = VK_NULL_HANDLE; + break; + } + } + + vkd3d_mutex_unlock(&fence->mutex); + + if (*vk_fence) + return VK_SUCCESS; + +create_fence: + vk_procs = &device->vk_procs; + + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_info.pNext = NULL; + fence_info.flags = 0; + if ((vr = VK_CALL(vkCreateFence(device->vk_device, &fence_info, NULL, vk_fence))) < 0) + { + WARN("Failed to create Vulkan fence, vr %d.\n", vr); + *vk_fence = VK_NULL_HANDLE; + } + + return vr; +} + +static void d3d12_fence_garbage_collect_vk_semaphores_locked(struct d3d12_fence *fence, + bool destroy_all) +{ + struct d3d12_device *device = fence->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_signaled_semaphore *current, *p; + unsigned int semaphore_count; + + semaphore_count = fence->semaphore_count; + if (!destroy_all && semaphore_count < VKD3D_MAX_VK_SYNC_OBJECTS) + return; + + LIST_FOR_EACH_ENTRY_SAFE(current, p, &fence->semaphores, struct vkd3d_signaled_semaphore, entry) + { + if (!destroy_all && fence->semaphore_count < VKD3D_MAX_VK_SYNC_OBJECTS) + break; + + /* The semaphore doesn't have a pending signal operation if the fence + * was signaled. */ + if ((current->vk_fence || current->is_acquired) && !destroy_all) + continue; + + if (current->vk_fence) + WARN("Destroying potentially pending semaphore.\n"); + assert(!current->is_acquired); + + VK_CALL(vkDestroySemaphore(device->vk_device, current->vk_semaphore, NULL)); + list_remove(¤t->entry); + vkd3d_free(current); + + --fence->semaphore_count; + } + + if (semaphore_count != fence->semaphore_count) + TRACE("Destroyed %u Vulkan semaphores.\n", semaphore_count - fence->semaphore_count); +} + +static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence) +{ + const struct vkd3d_vk_device_procs *vk_procs; + struct d3d12_device *device = fence->device; + unsigned int i; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + vk_procs = &device->vk_procs; + + for (i = 0; i < ARRAY_SIZE(fence->old_vk_fences); ++i) + { + if (fence->old_vk_fences[i]) + VK_CALL(vkDestroyFence(device->vk_device, fence->old_vk_fences[i], NULL)); + fence->old_vk_fences[i] = VK_NULL_HANDLE; + } + + d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); + + vkd3d_mutex_unlock(&fence->mutex); +} + +static struct vkd3d_signaled_semaphore *d3d12_fence_acquire_vk_semaphore(struct d3d12_fence *fence, + uint64_t value, uint64_t *completed_value) +{ + struct vkd3d_signaled_semaphore *semaphore; + struct vkd3d_signaled_semaphore *current; + uint64_t semaphore_value; + int rc; + + TRACE("fence %p, value %#"PRIx64".\n", fence, value); + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return VK_NULL_HANDLE; + } + + semaphore = NULL; + semaphore_value = ~(uint64_t)0; + + LIST_FOR_EACH_ENTRY(current, &fence->semaphores, struct vkd3d_signaled_semaphore, entry) + { + /* Prefer a semaphore with the smallest value. */ + if (!current->is_acquired && current->value >= value && semaphore_value >= current->value) + { + semaphore = current; + semaphore_value = current->value; + } + if (semaphore_value == value) + break; + } + + if (semaphore) + semaphore->is_acquired = true; + + *completed_value = fence->value; + + vkd3d_mutex_unlock(&fence->mutex); + + return semaphore; +} + +static void d3d12_fence_remove_vk_semaphore(struct d3d12_fence *fence, struct vkd3d_signaled_semaphore *semaphore) +{ + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + assert(semaphore->is_acquired); + + list_remove(&semaphore->entry); + vkd3d_free(semaphore); + + --fence->semaphore_count; + + vkd3d_mutex_unlock(&fence->mutex); +} + +static void d3d12_fence_release_vk_semaphore(struct d3d12_fence *fence, struct vkd3d_signaled_semaphore *semaphore) +{ + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + assert(semaphore->is_acquired); + semaphore->is_acquired = false; + + vkd3d_mutex_unlock(&fence->mutex); +} + +static HRESULT d3d12_fence_add_vk_semaphore(struct d3d12_fence *fence, + VkSemaphore vk_semaphore, VkFence vk_fence, uint64_t value) +{ + struct vkd3d_signaled_semaphore *semaphore; + HRESULT hr = S_OK; + int rc; + + TRACE("fence %p, value %#"PRIx64".\n", fence, value); + + if (!(semaphore = vkd3d_malloc(sizeof(*semaphore)))) + { + ERR("Failed to add semaphore.\n"); + return E_OUTOFMEMORY; + } + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + vkd3d_free(semaphore); + return E_FAIL; + } + + d3d12_fence_garbage_collect_vk_semaphores_locked(fence, false); + + semaphore->value = value; + semaphore->vk_semaphore = vk_semaphore; + semaphore->vk_fence = vk_fence; + semaphore->is_acquired = false; + + list_add_tail(&fence->semaphores, &semaphore->entry); + ++fence->semaphore_count; + + vkd3d_mutex_unlock(&fence->mutex); + + return hr; +} + +static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence) +{ + struct d3d12_device *device = fence->device; + struct vkd3d_signaled_semaphore *current; + bool signal_null_event_cond = false; + unsigned int i, j; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + fence->value = value; + + for (i = 0, j = 0; i < fence->event_count; ++i) + { + struct vkd3d_waiting_event *current = &fence->events[i]; + + if (current->value <= value) + { + if (current->event) + { + fence->device->signal_event(current->event); + } + else + { + current->latch = true; + signal_null_event_cond = true; + } + } + else + { + if (i != j) + fence->events[j] = *current; + ++j; + } + } + fence->event_count = j; + + if (signal_null_event_cond) + vkd3d_cond_broadcast(&fence->null_event_cond); + + if (vk_fence) + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + LIST_FOR_EACH_ENTRY(current, &fence->semaphores, struct vkd3d_signaled_semaphore, entry) + { + if (current->vk_fence == vk_fence) + current->vk_fence = VK_NULL_HANDLE; + } + + for (i = 0; i < ARRAY_SIZE(fence->old_vk_fences); ++i) + { + if (fence->old_vk_fences[i] == VK_NULL_HANDLE) + { + fence->old_vk_fences[i] = vk_fence; + VK_CALL(vkResetFences(device->vk_device, 1, &vk_fence)); + vk_fence = VK_NULL_HANDLE; + break; + } + } + if (vk_fence) + VK_CALL(vkDestroyFence(device->vk_device, vk_fence, NULL)); + } + + vkd3d_mutex_unlock(&fence->mutex); + + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, + REFIID riid, void **object) +{ + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + + if (IsEqualGUID(riid, &IID_ID3D12Fence) + || IsEqualGUID(riid, &IID_ID3D12Pageable) + || IsEqualGUID(riid, &IID_ID3D12DeviceChild) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { + ID3D12Fence_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence *iface) +{ + struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + ULONG refcount = InterlockedIncrement(&fence->refcount); + + TRACE("%p increasing refcount to %u.\n", fence, refcount); + + return refcount; +} + +static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence *iface) +{ + struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + ULONG refcount = InterlockedDecrement(&fence->refcount); + int rc; + + TRACE("%p decreasing refcount to %u.\n", fence, refcount); + + if (!refcount) + { + struct d3d12_device *device = fence->device; + + vkd3d_private_store_destroy(&fence->private_store); + + vkd3d_fence_worker_remove_fence(&device->fence_worker, fence); + + d3d12_fence_destroy_vk_objects(fence); + + vkd3d_free(fence->events); + if ((rc = vkd3d_mutex_destroy(&fence->mutex))) + ERR("Failed to destroy mutex, error %d.\n", rc); + vkd3d_cond_destroy(&fence->null_event_cond); + vkd3d_free(fence); + + d3d12_device_release(device); + } + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", + iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&fence->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", + iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&fence->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence *iface, + REFGUID guid, const IUnknown *data) +{ + struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&fence->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence *iface, const WCHAR *name) +{ + struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, fence->device->wchar_size)); + + return name ? S_OK : E_INVALIDARG; +} + +static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence *iface, REFIID iid, void **device) +{ + struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(fence->device, iid, device); +} + +static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface) +{ + struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + uint64_t completed_value; + int rc; + + TRACE("iface %p.\n", iface); + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return 0; + } + completed_value = fence->value; + vkd3d_mutex_unlock(&fence->mutex); + return completed_value; +} + +static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *iface, + UINT64 value, HANDLE event) +{ + struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + unsigned int i; + bool *latch; + int rc; + + TRACE("iface %p, value %#"PRIx64", event %p.\n", iface, value, event); + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if (value <= fence->value) + { + if (event) + fence->device->signal_event(event); + vkd3d_mutex_unlock(&fence->mutex); + return S_OK; + } + + for (i = 0; i < fence->event_count; ++i) + { + struct vkd3d_waiting_event *current = &fence->events[i]; + if (current->value == value && current->event == event) + { + WARN("Event completion for (%p, %#"PRIx64") is already in the list.\n", + event, value); + vkd3d_mutex_unlock(&fence->mutex); + return S_OK; + } + } + + if (!vkd3d_array_reserve((void **)&fence->events, &fence->events_size, + fence->event_count + 1, sizeof(*fence->events))) + { + WARN("Failed to add event.\n"); + vkd3d_mutex_unlock(&fence->mutex); + return E_OUTOFMEMORY; + } + + fence->events[fence->event_count].value = value; + fence->events[fence->event_count].event = event; + fence->events[fence->event_count].latch = false; + latch = &fence->events[fence->event_count].latch; + ++fence->event_count; + + /* If event is NULL, we need to block until the fence value completes. + * Implement this in a uniform way where we pretend we have a dummy event. + * A NULL fence->events[].event means that we should set latch to true + * and signal a condition variable instead of calling external signal_event callback. */ + if (!event) + { + while (!*latch) + vkd3d_cond_wait(&fence->null_event_cond, &fence->mutex); + } + + vkd3d_mutex_unlock(&fence->mutex); + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) +{ + struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + + TRACE("iface %p, value %#"PRIx64".\n", iface, value); + + return d3d12_fence_signal(fence, value, VK_NULL_HANDLE); +} + +static const struct ID3D12FenceVtbl d3d12_fence_vtbl = +{ + /* IUnknown methods */ + d3d12_fence_QueryInterface, + d3d12_fence_AddRef, + d3d12_fence_Release, + /* ID3D12Object methods */ + d3d12_fence_GetPrivateData, + d3d12_fence_SetPrivateData, + d3d12_fence_SetPrivateDataInterface, + d3d12_fence_SetName, + /* ID3D12DeviceChild methods */ + d3d12_fence_GetDevice, + /* ID3D12Fence methods */ + d3d12_fence_GetCompletedValue, + d3d12_fence_SetEventOnCompletion, + d3d12_fence_Signal, +}; + +static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) +{ + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_fence_vtbl); + return impl_from_ID3D12Fence(iface); +} + +static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, + UINT64 initial_value, D3D12_FENCE_FLAGS flags) +{ + HRESULT hr; + int rc; + + fence->ID3D12Fence_iface.lpVtbl = &d3d12_fence_vtbl; + fence->refcount = 1; + + fence->value = initial_value; + + if ((rc = vkd3d_mutex_init(&fence->mutex))) + { + ERR("Failed to initialize mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if ((rc = vkd3d_cond_init(&fence->null_event_cond))) + { + ERR("Failed to initialize cond variable, error %d.\n", rc); + vkd3d_mutex_destroy(&fence->mutex); + return hresult_from_errno(rc); + } + + if (flags) + FIXME("Ignoring flags %#x.\n", flags); + + fence->events = NULL; + fence->events_size = 0; + fence->event_count = 0; + + list_init(&fence->semaphores); + fence->semaphore_count = 0; + + memset(fence->old_vk_fences, 0, sizeof(fence->old_vk_fences)); + + fence->pending_worker_operation_count = 0; + + if (FAILED(hr = vkd3d_private_store_init(&fence->private_store))) + { + vkd3d_mutex_destroy(&fence->mutex); + vkd3d_cond_destroy(&fence->null_event_cond); + return hr; + } + + d3d12_device_add_ref(fence->device = device); + + return S_OK; +} + +HRESULT d3d12_fence_create(struct d3d12_device *device, + uint64_t initial_value, D3D12_FENCE_FLAGS flags, struct d3d12_fence **fence) +{ + struct d3d12_fence *object; + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + d3d12_fence_init(object, device, initial_value, flags); + + TRACE("Created fence %p.\n", object); + + *fence = object; + + return S_OK; +} + +/* Command buffers */ +static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list, + const char *message, ...) +{ + va_list args; + + va_start(args, message); + WARN("Command list %p is invalid: \"%s\".\n", list, vkd3d_dbg_vsprintf(message, args)); + va_end(args); + + list->is_valid = false; +} + +static HRESULT d3d12_command_list_begin_command_buffer(struct d3d12_command_list *list) +{ + struct d3d12_device *device = list->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkCommandBufferBeginInfo begin_info; + VkResult vr; + + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.pNext = NULL; + begin_info.flags = 0; + begin_info.pInheritanceInfo = NULL; + + if ((vr = VK_CALL(vkBeginCommandBuffer(list->vk_command_buffer, &begin_info))) < 0) + { + WARN("Failed to begin command buffer, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + list->is_recording = true; + list->is_valid = true; + + return S_OK; +} + +static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_command_allocator *allocator, + struct d3d12_command_list *list) +{ + struct d3d12_device *device = allocator->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkCommandBufferAllocateInfo command_buffer_info; + VkResult vr; + HRESULT hr; + + TRACE("allocator %p, list %p.\n", allocator, list); + + if (allocator->current_command_list) + { + WARN("Command allocator is already in use.\n"); + return E_INVALIDARG; + } + + command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + command_buffer_info.pNext = NULL; + command_buffer_info.commandPool = allocator->vk_command_pool; + command_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + command_buffer_info.commandBufferCount = 1; + + if ((vr = VK_CALL(vkAllocateCommandBuffers(device->vk_device, &command_buffer_info, + &list->vk_command_buffer))) < 0) + { + WARN("Failed to allocate Vulkan command buffer, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + list->vk_queue_flags = allocator->vk_queue_flags; + + if (FAILED(hr = d3d12_command_list_begin_command_buffer(list))) + { + VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool, + 1, &list->vk_command_buffer)); + return hr; + } + + allocator->current_command_list = list; + + return S_OK; +} + +static void d3d12_command_allocator_free_command_buffer(struct d3d12_command_allocator *allocator, + struct d3d12_command_list *list) +{ + struct d3d12_device *device = allocator->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + TRACE("allocator %p, list %p.\n", allocator, list); + + if (allocator->current_command_list == list) + allocator->current_command_list = NULL; + + if (!vkd3d_array_reserve((void **)&allocator->command_buffers, &allocator->command_buffers_size, + allocator->command_buffer_count + 1, sizeof(*allocator->command_buffers))) + { + WARN("Failed to add command buffer.\n"); + VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool, + 1, &list->vk_command_buffer)); + return; + } + + allocator->command_buffers[allocator->command_buffer_count++] = list->vk_command_buffer; +} + +static bool d3d12_command_allocator_add_render_pass(struct d3d12_command_allocator *allocator, VkRenderPass pass) +{ + if (!vkd3d_array_reserve((void **)&allocator->passes, &allocator->passes_size, + allocator->pass_count + 1, sizeof(*allocator->passes))) + return false; + + allocator->passes[allocator->pass_count++] = pass; + + return true; +} + +static bool d3d12_command_allocator_add_framebuffer(struct d3d12_command_allocator *allocator, + VkFramebuffer framebuffer) +{ + if (!vkd3d_array_reserve((void **)&allocator->framebuffers, &allocator->framebuffers_size, + allocator->framebuffer_count + 1, sizeof(*allocator->framebuffers))) + return false; + + allocator->framebuffers[allocator->framebuffer_count++] = framebuffer; + + return true; +} + +static bool d3d12_command_allocator_add_descriptor_pool(struct d3d12_command_allocator *allocator, + VkDescriptorPool pool) +{ + if (!vkd3d_array_reserve((void **)&allocator->descriptor_pools, &allocator->descriptor_pools_size, + allocator->descriptor_pool_count + 1, sizeof(*allocator->descriptor_pools))) + return false; + + allocator->descriptor_pools[allocator->descriptor_pool_count++] = pool; + + return true; +} + +static bool d3d12_command_allocator_add_view(struct d3d12_command_allocator *allocator, + struct vkd3d_view *view) +{ + if (!vkd3d_array_reserve((void **)&allocator->views, &allocator->views_size, + allocator->view_count + 1, sizeof(*allocator->views))) + return false; + + vkd3d_view_incref(view); + allocator->views[allocator->view_count++] = view; + + return true; +} + +static bool d3d12_command_allocator_add_buffer_view(struct d3d12_command_allocator *allocator, + VkBufferView view) +{ + if (!vkd3d_array_reserve((void **)&allocator->buffer_views, &allocator->buffer_views_size, + allocator->buffer_view_count + 1, sizeof(*allocator->buffer_views))) + return false; + + allocator->buffer_views[allocator->buffer_view_count++] = view; + + return true; +} + +static bool d3d12_command_allocator_add_transfer_buffer(struct d3d12_command_allocator *allocator, + const struct vkd3d_buffer *buffer) +{ + if (!vkd3d_array_reserve((void **)&allocator->transfer_buffers, &allocator->transfer_buffers_size, + allocator->transfer_buffer_count + 1, sizeof(*allocator->transfer_buffers))) + return false; + + allocator->transfer_buffers[allocator->transfer_buffer_count++] = *buffer; + + return true; +} + +static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( + struct d3d12_command_allocator *allocator) +{ + struct d3d12_device *device = allocator->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct VkDescriptorPoolCreateInfo pool_desc; + VkDevice vk_device = device->vk_device; + VkDescriptorPool vk_pool; + VkResult vr; + + if (allocator->free_descriptor_pool_count > 0) + { + vk_pool = allocator->free_descriptor_pools[allocator->free_descriptor_pool_count - 1]; + allocator->free_descriptor_pools[allocator->free_descriptor_pool_count - 1] = VK_NULL_HANDLE; + --allocator->free_descriptor_pool_count; + } + else + { + pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + pool_desc.pNext = NULL; + pool_desc.flags = 0; + pool_desc.maxSets = 512; + pool_desc.poolSizeCount = ARRAY_SIZE(device->vk_pool_sizes); + pool_desc.pPoolSizes = device->vk_pool_sizes; + if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) + { + ERR("Failed to create descriptor pool, vr %d.\n", vr); + return VK_NULL_HANDLE; + } + } + + if (!(d3d12_command_allocator_add_descriptor_pool(allocator, vk_pool))) + { + ERR("Failed to add descriptor pool.\n"); + VK_CALL(vkDestroyDescriptorPool(vk_device, vk_pool, NULL)); + return VK_NULL_HANDLE; + } + + return vk_pool; +} + +static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( + struct d3d12_command_allocator *allocator, VkDescriptorSetLayout vk_set_layout, + unsigned int variable_binding_size, bool unbounded) +{ + struct d3d12_device *device = allocator->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; + struct VkDescriptorSetAllocateInfo set_desc; + VkDevice vk_device = device->vk_device; + VkDescriptorSet vk_descriptor_set; + VkResult vr; + + if (!allocator->vk_descriptor_pool) + allocator->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator); + if (!allocator->vk_descriptor_pool) + return VK_NULL_HANDLE; + + set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + set_desc.pNext = NULL; + set_desc.descriptorPool = allocator->vk_descriptor_pool; + set_desc.descriptorSetCount = 1; + set_desc.pSetLayouts = &vk_set_layout; + if (unbounded) + { + set_desc.pNext = &set_size; + set_size.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT; + set_size.pNext = NULL; + set_size.descriptorSetCount = 1; + set_size.pDescriptorCounts = &variable_binding_size; + } + if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) >= 0) + return vk_descriptor_set; + + allocator->vk_descriptor_pool = VK_NULL_HANDLE; + if (vr == VK_ERROR_FRAGMENTED_POOL || vr == VK_ERROR_OUT_OF_POOL_MEMORY_KHR) + allocator->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator); + if (!allocator->vk_descriptor_pool) + { + ERR("Failed to allocate descriptor set, vr %d.\n", vr); + return VK_NULL_HANDLE; + } + + set_desc.descriptorPool = allocator->vk_descriptor_pool; + if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) < 0) + { + FIXME("Failed to allocate descriptor set from a new pool, vr %d.\n", vr); + return VK_NULL_HANDLE; + } + + return vk_descriptor_set; +} + +static void d3d12_command_list_allocator_destroyed(struct d3d12_command_list *list) +{ + TRACE("list %p.\n", list); + + list->allocator = NULL; + list->vk_command_buffer = VK_NULL_HANDLE; +} + +static void vkd3d_buffer_destroy(struct vkd3d_buffer *buffer, struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + VK_CALL(vkFreeMemory(device->vk_device, buffer->vk_memory, NULL)); + VK_CALL(vkDestroyBuffer(device->vk_device, buffer->vk_buffer, NULL)); +} + +static void d3d12_command_allocator_free_resources(struct d3d12_command_allocator *allocator, + bool keep_reusable_resources) +{ + struct d3d12_device *device = allocator->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + unsigned int i, j; + + allocator->vk_descriptor_pool = VK_NULL_HANDLE; + + if (keep_reusable_resources) + { + if (vkd3d_array_reserve((void **)&allocator->free_descriptor_pools, + &allocator->free_descriptor_pools_size, + allocator->free_descriptor_pool_count + allocator->descriptor_pool_count, + sizeof(*allocator->free_descriptor_pools))) + { + for (i = 0, j = allocator->free_descriptor_pool_count; i < allocator->descriptor_pool_count; ++i, ++j) + { + VK_CALL(vkResetDescriptorPool(device->vk_device, allocator->descriptor_pools[i], 0)); + allocator->free_descriptor_pools[j] = allocator->descriptor_pools[i]; + } + allocator->free_descriptor_pool_count += allocator->descriptor_pool_count; + allocator->descriptor_pool_count = 0; + } + } + else + { + for (i = 0; i < allocator->free_descriptor_pool_count; ++i) + { + VK_CALL(vkDestroyDescriptorPool(device->vk_device, allocator->free_descriptor_pools[i], NULL)); + } + allocator->free_descriptor_pool_count = 0; + } + + for (i = 0; i < allocator->transfer_buffer_count; ++i) + { + vkd3d_buffer_destroy(&allocator->transfer_buffers[i], device); + } + allocator->transfer_buffer_count = 0; + + for (i = 0; i < allocator->buffer_view_count; ++i) + { + VK_CALL(vkDestroyBufferView(device->vk_device, allocator->buffer_views[i], NULL)); + } + allocator->buffer_view_count = 0; + + for (i = 0; i < allocator->view_count; ++i) + { + vkd3d_view_decref(allocator->views[i], device); + } + allocator->view_count = 0; + + for (i = 0; i < allocator->descriptor_pool_count; ++i) + { + VK_CALL(vkDestroyDescriptorPool(device->vk_device, allocator->descriptor_pools[i], NULL)); + } + allocator->descriptor_pool_count = 0; + + for (i = 0; i < allocator->framebuffer_count; ++i) + { + VK_CALL(vkDestroyFramebuffer(device->vk_device, allocator->framebuffers[i], NULL)); + } + allocator->framebuffer_count = 0; + + for (i = 0; i < allocator->pass_count; ++i) + { + VK_CALL(vkDestroyRenderPass(device->vk_device, allocator->passes[i], NULL)); + } + allocator->pass_count = 0; +} + +/* ID3D12CommandAllocator */ +static inline struct d3d12_command_allocator *impl_from_ID3D12CommandAllocator(ID3D12CommandAllocator *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_command_allocator, ID3D12CommandAllocator_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_QueryInterface(ID3D12CommandAllocator *iface, + REFIID riid, void **object) +{ + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + + if (IsEqualGUID(riid, &IID_ID3D12CommandAllocator) + || IsEqualGUID(riid, &IID_ID3D12Pageable) + || IsEqualGUID(riid, &IID_ID3D12DeviceChild) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { + ID3D12CommandAllocator_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_command_allocator_AddRef(ID3D12CommandAllocator *iface) +{ + struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); + ULONG refcount = InterlockedIncrement(&allocator->refcount); + + TRACE("%p increasing refcount to %u.\n", allocator, refcount); + + return refcount; +} + +static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllocator *iface) +{ + struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); + ULONG refcount = InterlockedDecrement(&allocator->refcount); + + TRACE("%p decreasing refcount to %u.\n", allocator, refcount); + + if (!refcount) + { + struct d3d12_device *device = allocator->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + vkd3d_private_store_destroy(&allocator->private_store); + + if (allocator->current_command_list) + d3d12_command_list_allocator_destroyed(allocator->current_command_list); + + d3d12_command_allocator_free_resources(allocator, false); + vkd3d_free(allocator->transfer_buffers); + vkd3d_free(allocator->buffer_views); + vkd3d_free(allocator->views); + vkd3d_free(allocator->descriptor_pools); + vkd3d_free(allocator->free_descriptor_pools); + vkd3d_free(allocator->framebuffers); + vkd3d_free(allocator->passes); + + /* All command buffers are implicitly freed when a pool is destroyed. */ + vkd3d_free(allocator->command_buffers); + VK_CALL(vkDestroyCommandPool(device->vk_device, allocator->vk_command_pool, NULL)); + + vkd3d_free(allocator); + + d3d12_device_release(device); + } + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_GetPrivateData(ID3D12CommandAllocator *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&allocator->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_SetPrivateData(ID3D12CommandAllocator *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&allocator->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_SetPrivateDataInterface(ID3D12CommandAllocator *iface, + REFGUID guid, const IUnknown *data) +{ + struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&allocator->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_SetName(ID3D12CommandAllocator *iface, const WCHAR *name) +{ + struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, allocator->device->wchar_size)); + + return vkd3d_set_vk_object_name(allocator->device, (uint64_t)allocator->vk_command_pool, + VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT, name); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_GetDevice(ID3D12CommandAllocator *iface, REFIID iid, void **device) +{ + struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(allocator->device, iid, device); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllocator *iface) +{ + struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); + const struct vkd3d_vk_device_procs *vk_procs; + struct d3d12_command_list *list; + struct d3d12_device *device; + VkResult vr; + + TRACE("iface %p.\n", iface); + + if ((list = allocator->current_command_list)) + { + if (list->is_recording) + { + WARN("A command list using this allocator is in the recording state.\n"); + return E_FAIL; + } + + TRACE("Resetting command list %p.\n", list); + } + + device = allocator->device; + vk_procs = &device->vk_procs; + + d3d12_command_allocator_free_resources(allocator, true); + if (allocator->command_buffer_count) + { + VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool, + allocator->command_buffer_count, allocator->command_buffers)); + allocator->command_buffer_count = 0; + } + + /* The intent here is to recycle memory, so do not use RELEASE_RESOURCES_BIT here. */ + if ((vr = VK_CALL(vkResetCommandPool(device->vk_device, allocator->vk_command_pool, 0)))) + { + WARN("Resetting command pool failed, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static const struct ID3D12CommandAllocatorVtbl d3d12_command_allocator_vtbl = +{ + /* IUnknown methods */ + d3d12_command_allocator_QueryInterface, + d3d12_command_allocator_AddRef, + d3d12_command_allocator_Release, + /* ID3D12Object methods */ + d3d12_command_allocator_GetPrivateData, + d3d12_command_allocator_SetPrivateData, + d3d12_command_allocator_SetPrivateDataInterface, + d3d12_command_allocator_SetName, + /* ID3D12DeviceChild methods */ + d3d12_command_allocator_GetDevice, + /* ID3D12CommandAllocator methods */ + d3d12_command_allocator_Reset, +}; + +static struct d3d12_command_allocator *unsafe_impl_from_ID3D12CommandAllocator(ID3D12CommandAllocator *iface) +{ + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_command_allocator_vtbl); + return impl_from_ID3D12CommandAllocator(iface); +} + +struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, + D3D12_COMMAND_LIST_TYPE type) +{ + switch (type) + { + case D3D12_COMMAND_LIST_TYPE_DIRECT: + return device->direct_queue; + case D3D12_COMMAND_LIST_TYPE_COMPUTE: + return device->compute_queue; + case D3D12_COMMAND_LIST_TYPE_COPY: + return device->copy_queue; + default: + FIXME("Unhandled command list type %#x.\n", type); + return NULL; + } +} + +static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allocator, + struct d3d12_device *device, D3D12_COMMAND_LIST_TYPE type) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkCommandPoolCreateInfo command_pool_info; + struct vkd3d_queue *queue; + VkResult vr; + HRESULT hr; + + if (FAILED(hr = vkd3d_private_store_init(&allocator->private_store))) + return hr; + + if (!(queue = d3d12_device_get_vkd3d_queue(device, type))) + queue = device->direct_queue; + + allocator->ID3D12CommandAllocator_iface.lpVtbl = &d3d12_command_allocator_vtbl; + allocator->refcount = 1; + + allocator->type = type; + allocator->vk_queue_flags = queue->vk_queue_flags; + + command_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + command_pool_info.pNext = NULL; + /* Do not use RESET_COMMAND_BUFFER_BIT. This allows the CommandPool to be a D3D12-style command pool. + * Memory is owned by the pool and CommandBuffers become lightweight handles, + * assuming a half-decent driver implementation. */ + command_pool_info.flags = 0; + command_pool_info.queueFamilyIndex = queue->vk_family_index; + + if ((vr = VK_CALL(vkCreateCommandPool(device->vk_device, &command_pool_info, NULL, + &allocator->vk_command_pool))) < 0) + { + WARN("Failed to create Vulkan command pool, vr %d.\n", vr); + vkd3d_private_store_destroy(&allocator->private_store); + return hresult_from_vk_result(vr); + } + + allocator->vk_descriptor_pool = VK_NULL_HANDLE; + + allocator->free_descriptor_pools = NULL; + allocator->free_descriptor_pools_size = 0; + allocator->free_descriptor_pool_count = 0; + + allocator->passes = NULL; + allocator->passes_size = 0; + allocator->pass_count = 0; + + allocator->framebuffers = NULL; + allocator->framebuffers_size = 0; + allocator->framebuffer_count = 0; + + allocator->descriptor_pools = NULL; + allocator->descriptor_pools_size = 0; + allocator->descriptor_pool_count = 0; + + allocator->views = NULL; + allocator->views_size = 0; + allocator->view_count = 0; + + allocator->buffer_views = NULL; + allocator->buffer_views_size = 0; + allocator->buffer_view_count = 0; + + allocator->transfer_buffers = NULL; + allocator->transfer_buffers_size = 0; + allocator->transfer_buffer_count = 0; + + allocator->command_buffers = NULL; + allocator->command_buffers_size = 0; + allocator->command_buffer_count = 0; + + allocator->current_command_list = NULL; + + d3d12_device_add_ref(allocator->device = device); + + return S_OK; +} + +HRESULT d3d12_command_allocator_create(struct d3d12_device *device, + D3D12_COMMAND_LIST_TYPE type, struct d3d12_command_allocator **allocator) +{ + struct d3d12_command_allocator *object; + HRESULT hr; + + if (!(D3D12_COMMAND_LIST_TYPE_DIRECT <= type && type <= D3D12_COMMAND_LIST_TYPE_COPY)) + { + WARN("Invalid type %#x.\n", type); + return E_INVALIDARG; + } + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_command_allocator_init(object, device, type))) + { + vkd3d_free(object); + return hr; + } + + TRACE("Created command allocator %p.\n", object); + + *allocator = object; + + return S_OK; +} + +/* ID3D12CommandList */ +static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList2(ID3D12GraphicsCommandList2 *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); +} + +static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list) +{ + list->current_framebuffer = VK_NULL_HANDLE; +} + +static void d3d12_command_list_invalidate_current_pipeline(struct d3d12_command_list *list) +{ + list->current_pipeline = VK_NULL_HANDLE; +} + +static void d3d12_command_list_end_current_render_pass(struct d3d12_command_list *list) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + + if (list->xfb_enabled) + { + VK_CALL(vkCmdEndTransformFeedbackEXT(list->vk_command_buffer, 0, ARRAY_SIZE(list->so_counter_buffers), + list->so_counter_buffers, list->so_counter_buffer_offsets)); + } + + if (list->current_render_pass) + VK_CALL(vkCmdEndRenderPass(list->vk_command_buffer)); + + list->current_render_pass = VK_NULL_HANDLE; + + if (list->xfb_enabled) + { + VkMemoryBarrier vk_barrier; + + /* We need a barrier between pause and resume. */ + vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + vk_barrier.pNext = NULL; + vk_barrier.srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; + vk_barrier.dstAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT; + VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer, + VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, 0, + 1, &vk_barrier, 0, NULL, 0, NULL)); + + list->xfb_enabled = false; + } +} + +static void d3d12_command_list_invalidate_current_render_pass(struct d3d12_command_list *list) +{ + d3d12_command_list_end_current_render_pass(list); +} + +static void d3d12_command_list_invalidate_bindings(struct d3d12_command_list *list, + struct d3d12_pipeline_state *state) +{ + if (state && state->uav_counters.binding_count) + { + enum vkd3d_pipeline_bind_point bind_point = (enum vkd3d_pipeline_bind_point)state->vk_bind_point; + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + + vkd3d_array_reserve((void **)&bindings->vk_uav_counter_views, &bindings->vk_uav_counter_views_size, + state->uav_counters.binding_count, sizeof(*bindings->vk_uav_counter_views)); + bindings->uav_counters_dirty = true; + } +} + +static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + + if (!bindings->root_signature) + return; + + bindings->descriptor_set_count = 0; + bindings->descriptor_table_dirty_mask = bindings->descriptor_table_active_mask & bindings->root_signature->descriptor_table_mask; + bindings->push_descriptor_dirty_mask = bindings->push_descriptor_active_mask & bindings->root_signature->push_descriptor_mask; +} + +static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, unsigned int stencil_state, + const struct d3d12_resource *resource, VkQueueFlags vk_queue_flags, const struct vkd3d_vulkan_info *vk_info, + VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout) +{ + bool is_swapchain_image = resource && (resource->flags & VKD3D_RESOURCE_PRESENT_STATE_TRANSITION); + VkPipelineStageFlags queue_shader_stages = 0; + + if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT) + { + queue_shader_stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT + | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT + | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT + | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT + | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } + if (vk_queue_flags & VK_QUEUE_COMPUTE_BIT) + queue_shader_stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + + switch (state) + { + case D3D12_RESOURCE_STATE_COMMON: /* D3D12_RESOURCE_STATE_PRESENT */ + /* The COMMON state is used for ownership transfer between + * DIRECT/COMPUTE and COPY queues. Additionally, a texture has to + * be in the COMMON state to be accessed by CPU. Moreover, + * resources can be implicitly promoted to other states out of the + * COMMON state, and the resource state can decay to the COMMON + * state when GPU finishes execution of a command list. */ + if (is_swapchain_image) + { + if (resource->present_state == D3D12_RESOURCE_STATE_PRESENT) + { + *access_mask = VK_ACCESS_MEMORY_READ_BIT; + *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + if (image_layout) + *image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + return true; + } + else if (resource->present_state != D3D12_RESOURCE_STATE_COMMON) + { + vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, + resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); + return true; + } + } + + *access_mask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT; + *stage_flags = VK_PIPELINE_STAGE_HOST_BIT; + if (image_layout) + *image_layout = VK_IMAGE_LAYOUT_GENERAL; + return true; + + /* Handle write states. */ + case D3D12_RESOURCE_STATE_RENDER_TARGET: + *access_mask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT + | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + if (image_layout) + *image_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + return true; + + case D3D12_RESOURCE_STATE_UNORDERED_ACCESS: + *access_mask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + *stage_flags = queue_shader_stages; + if (image_layout) + *image_layout = VK_IMAGE_LAYOUT_GENERAL; + return true; + + case D3D12_RESOURCE_STATE_DEPTH_WRITE: + *access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT + | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + *stage_flags = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT + | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + if (image_layout) + { + if (!stencil_state || (stencil_state & D3D12_RESOURCE_STATE_DEPTH_WRITE)) + *image_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + else + *image_layout = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL; + } + return true; + + case D3D12_RESOURCE_STATE_COPY_DEST: + case D3D12_RESOURCE_STATE_RESOLVE_DEST: + *access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + *stage_flags = VK_PIPELINE_STAGE_TRANSFER_BIT; + if (image_layout) + *image_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + return true; + + case D3D12_RESOURCE_STATE_STREAM_OUT: + *access_mask = VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT + | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT + | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; + *stage_flags = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT + | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + if (image_layout) + *image_layout = VK_IMAGE_LAYOUT_UNDEFINED; + return true; + + /* Set the Vulkan image layout for read-only states. */ + case D3D12_RESOURCE_STATE_DEPTH_READ: + case D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE: + case D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE: + case D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE: + *access_mask = 0; + *stage_flags = 0; + if (image_layout) + { + if (stencil_state & D3D12_RESOURCE_STATE_DEPTH_WRITE) + { + *image_layout = VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL; + *access_mask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + else + { + *image_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + } + } + break; + + case D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE: + case D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE: + case D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE: + *access_mask = 0; + *stage_flags = 0; + if (image_layout) + *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + + case D3D12_RESOURCE_STATE_COPY_SOURCE: + case D3D12_RESOURCE_STATE_RESOLVE_SOURCE: + *access_mask = 0; + *stage_flags = 0; + if (image_layout) + *image_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + break; + + default: + *access_mask = 0; + *stage_flags = 0; + if (image_layout) + *image_layout = VK_IMAGE_LAYOUT_GENERAL; + break; + } + + /* Handle read-only states. */ + assert(!is_write_resource_state(state)); + + if (state & D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER) + { + *access_mask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + | VK_ACCESS_UNIFORM_READ_BIT; + *stage_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT + | queue_shader_stages; + state &= ~D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + } + + if (state & D3D12_RESOURCE_STATE_INDEX_BUFFER) + { + *access_mask |= VK_ACCESS_INDEX_READ_BIT; + *stage_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + state &= ~D3D12_RESOURCE_STATE_INDEX_BUFFER; + } + + if (state & D3D12_RESOURCE_STATE_DEPTH_READ) + { + *access_mask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + *stage_flags |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT + | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + state &= ~D3D12_RESOURCE_STATE_DEPTH_READ; + } + + if (state & D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE) + { + *access_mask |= VK_ACCESS_SHADER_READ_BIT; + *stage_flags |= (queue_shader_stages & ~VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + state &= ~D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if (state & D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) + { + *access_mask |= VK_ACCESS_SHADER_READ_BIT; + *stage_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + state &= ~D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + } + + if (state & D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT) /* D3D12_RESOURCE_STATE_PREDICATION */ + { + *access_mask |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + *stage_flags |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + if (vk_info->EXT_conditional_rendering) + { + *access_mask |= VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT; + *stage_flags |= VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT; + } + state &= ~D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + } + + if (state & (D3D12_RESOURCE_STATE_COPY_SOURCE | D3D12_RESOURCE_STATE_RESOLVE_SOURCE)) + { + *access_mask |= VK_ACCESS_TRANSFER_READ_BIT; + *stage_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + state &= ~(D3D12_RESOURCE_STATE_COPY_SOURCE | D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + } + + if (state) + { + WARN("Invalid resource state %#x.\n", state); + return false; + } + return true; +} + +static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12_command_list *list, + struct d3d12_resource *resource) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; + VkPipelineStageFlags src_stage_mask, dst_stage_mask; + VkImageMemoryBarrier barrier; + + assert(d3d12_resource_is_texture(resource)); + + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = NULL; + + /* vkQueueSubmit() defines a memory dependency with prior host writes. */ + src_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + barrier.srcAccessMask = 0; + barrier.oldLayout = d3d12_resource_is_cpu_accessible(resource) ? + VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED; + + if (!vk_barrier_parameters_from_d3d12_resource_state(resource->initial_state, 0, + resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, &dst_stage_mask, &barrier.newLayout)) + { + FIXME("Unhandled state %#x.\n", resource->initial_state); + return; + } + + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = resource->u.vk_image; + barrier.subresourceRange.aspectMask = resource->format->vk_aspect_mask; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + + TRACE("Initial state %#x transition for resource %p (old layout %#x, new layout %#x).\n", + resource->initial_state, resource, barrier.oldLayout, barrier.newLayout); + + VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer, src_stage_mask, dst_stage_mask, 0, + 0, NULL, 0, NULL, 1, &barrier)); +} + +static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *list, + struct d3d12_resource *resource) +{ + if (resource->flags & VKD3D_RESOURCE_INITIAL_STATE_TRANSITION) + { + d3d12_command_list_end_current_render_pass(list); + + d3d12_command_list_transition_resource_to_initial_state(list, resource); + resource->flags &= ~VKD3D_RESOURCE_INITIAL_STATE_TRANSITION; + } +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList2 *iface, + REFIID iid, void **object) +{ + TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); + + if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) + || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList1) + || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList) + || IsEqualGUID(iid, &IID_ID3D12CommandList) + || IsEqualGUID(iid, &IID_ID3D12DeviceChild) + || IsEqualGUID(iid, &IID_ID3D12Object) + || IsEqualGUID(iid, &IID_IUnknown)) + { + ID3D12GraphicsCommandList2_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList2 *iface) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + ULONG refcount = InterlockedIncrement(&list->refcount); + + TRACE("%p increasing refcount to %u.\n", list, refcount); + + return refcount; +} + +static void vkd3d_pipeline_bindings_cleanup(struct vkd3d_pipeline_bindings *bindings) +{ + vkd3d_free(bindings->vk_uav_counter_views); +} + +static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList2 *iface) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + ULONG refcount = InterlockedDecrement(&list->refcount); + + TRACE("%p decreasing refcount to %u.\n", list, refcount); + + if (!refcount) + { + struct d3d12_device *device = list->device; + + vkd3d_private_store_destroy(&list->private_store); + + /* When command pool is destroyed, all command buffers are implicitly freed. */ + if (list->allocator) + d3d12_command_allocator_free_command_buffer(list->allocator, list); + + vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COMPUTE]); + vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS]); + + vkd3d_free(list); + + d3d12_device_release(device); + } + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList2 *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&list->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList2 *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&list->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList2 *iface, + REFGUID guid, const IUnknown *data) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&list->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList2 *iface, const WCHAR *name) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size)); + + return name ? S_OK : E_INVALIDARG; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList2 *iface, REFIID iid, void **device) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(list->device, iid, device); +} + +static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList2 *iface) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p.\n", iface); + + return list->type; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList2 *iface) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct vkd3d_vk_device_procs *vk_procs; + VkResult vr; + + TRACE("iface %p.\n", iface); + + if (!list->is_recording) + { + WARN("Command list is not in the recording state.\n"); + return E_FAIL; + } + + vk_procs = &list->device->vk_procs; + + d3d12_command_list_end_current_render_pass(list); + if (list->is_predicated) + VK_CALL(vkCmdEndConditionalRenderingEXT(list->vk_command_buffer)); + + if ((vr = VK_CALL(vkEndCommandBuffer(list->vk_command_buffer))) < 0) + { + WARN("Failed to end command buffer, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + if (list->allocator) + { + d3d12_command_allocator_free_command_buffer(list->allocator, list); + list->allocator = NULL; + } + + list->is_recording = false; + + if (!list->is_valid) + { + WARN("Error occurred during command list recording.\n"); + return E_INVALIDARG; + } + + return S_OK; +} + +static void d3d12_command_list_reset_state(struct d3d12_command_list *list, + ID3D12PipelineState *initial_pipeline_state) +{ + ID3D12GraphicsCommandList2 *iface = &list->ID3D12GraphicsCommandList2_iface; + + memset(list->strides, 0, sizeof(list->strides)); + list->primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + + list->index_buffer_format = DXGI_FORMAT_UNKNOWN; + + memset(list->rtvs, 0, sizeof(list->rtvs)); + list->dsv = VK_NULL_HANDLE; + list->dsv_format = VK_FORMAT_UNDEFINED; + list->fb_width = 0; + list->fb_height = 0; + list->fb_layer_count = 0; + + list->xfb_enabled = false; + + list->is_predicated = false; + + list->current_framebuffer = VK_NULL_HANDLE; + list->current_pipeline = VK_NULL_HANDLE; + list->pso_render_pass = VK_NULL_HANDLE; + list->current_render_pass = VK_NULL_HANDLE; + + vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COMPUTE]); + vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS]); + memset(list->pipeline_bindings, 0, sizeof(list->pipeline_bindings)); + list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS].vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; + list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COMPUTE].vk_bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + + list->state = NULL; + + memset(list->so_counter_buffers, 0, sizeof(list->so_counter_buffers)); + memset(list->so_counter_buffer_offsets, 0, sizeof(list->so_counter_buffer_offsets)); + + ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList2 *iface, + ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) +{ + struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + HRESULT hr; + + TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", + iface, allocator, initial_pipeline_state); + + if (!allocator_impl) + { + WARN("Command allocator is NULL.\n"); + return E_INVALIDARG; + } + + if (list->is_recording) + { + WARN("Command list is in the recording state.\n"); + return E_FAIL; + } + + if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator_impl, list))) + { + list->allocator = allocator_impl; + d3d12_command_list_reset_state(list, initial_pipeline_state); + } + + return hr; +} + +static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList2 *iface, + ID3D12PipelineState *pipeline_state) +{ + FIXME("iface %p, pipline_state %p stub!\n", iface, pipeline_state); +} + +static bool d3d12_command_list_has_depth_stencil_view(struct d3d12_command_list *list) +{ + struct d3d12_graphics_pipeline_state *graphics; + + assert(d3d12_pipeline_state_is_graphics(list->state)); + graphics = &list->state->u.graphics; + + return graphics->dsv_format || (d3d12_pipeline_state_has_unknown_dsv_format(list->state) && list->dsv_format); +} + +static void d3d12_command_list_get_fb_extent(struct d3d12_command_list *list, + uint32_t *width, uint32_t *height, uint32_t *layer_count) +{ + struct d3d12_graphics_pipeline_state *graphics = &list->state->u.graphics; + struct d3d12_device *device = list->device; + + if (graphics->rt_count || d3d12_command_list_has_depth_stencil_view(list)) + { + *width = list->fb_width; + *height = list->fb_height; + if (layer_count) + *layer_count = list->fb_layer_count; + } + else + { + *width = device->vk_info.device_limits.maxFramebufferWidth; + *height = device->vk_info.device_limits.maxFramebufferHeight; + if (layer_count) + *layer_count = 1; + } +} + +static bool d3d12_command_list_update_current_framebuffer(struct d3d12_command_list *list) +{ + struct d3d12_device *device = list->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkImageView views[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT + 1]; + struct d3d12_graphics_pipeline_state *graphics; + struct VkFramebufferCreateInfo fb_desc; + VkFramebuffer vk_framebuffer; + unsigned int view_count; + unsigned int i; + VkResult vr; + + if (list->current_framebuffer != VK_NULL_HANDLE) + return true; + + graphics = &list->state->u.graphics; + + for (i = 0, view_count = 0; i < graphics->rt_count; ++i) + { + if (graphics->null_attachment_mask & (1u << i)) + { + if (list->rtvs[i]) + WARN("Expected NULL RTV for attachment %u.\n", i); + continue; + } + + if (!list->rtvs[i]) + { + FIXME("Invalid RTV for attachment %u.\n", i); + return false; + } + + views[view_count++] = list->rtvs[i]; + } + + if (d3d12_command_list_has_depth_stencil_view(list)) + { + if (!(views[view_count++] = list->dsv)) + { + FIXME("Invalid DSV.\n"); + return false; + } + } + + fb_desc.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + fb_desc.pNext = NULL; + fb_desc.flags = 0; + fb_desc.renderPass = list->pso_render_pass; + fb_desc.attachmentCount = view_count; + fb_desc.pAttachments = views; + d3d12_command_list_get_fb_extent(list, &fb_desc.width, &fb_desc.height, &fb_desc.layers); + if ((vr = VK_CALL(vkCreateFramebuffer(device->vk_device, &fb_desc, NULL, &vk_framebuffer))) < 0) + { + WARN("Failed to create Vulkan framebuffer, vr %d.\n", vr); + return false; + } + + if (!d3d12_command_allocator_add_framebuffer(list->allocator, vk_framebuffer)) + { + WARN("Failed to add framebuffer.\n"); + VK_CALL(vkDestroyFramebuffer(device->vk_device, vk_framebuffer, NULL)); + return false; + } + + list->current_framebuffer = vk_framebuffer; + + return true; +} + +static bool d3d12_command_list_update_compute_pipeline(struct d3d12_command_list *list) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + + if (list->current_pipeline != VK_NULL_HANDLE) + return true; + + if (!d3d12_pipeline_state_is_compute(list->state)) + { + WARN("Pipeline state %p is not a compute pipeline.\n", list->state); + return false; + } + + VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, list->state->vk_bind_point, list->state->u.compute.vk_pipeline)); + list->current_pipeline = list->state->u.compute.vk_pipeline; + + return true; +} + +static bool d3d12_command_list_update_graphics_pipeline(struct d3d12_command_list *list) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + VkRenderPass vk_render_pass; + VkPipeline vk_pipeline; + + if (list->current_pipeline != VK_NULL_HANDLE) + return true; + + if (!d3d12_pipeline_state_is_graphics(list->state)) + { + WARN("Pipeline state %p is not a graphics pipeline.\n", list->state); + return false; + } + + if (!(vk_pipeline = d3d12_pipeline_state_get_or_create_pipeline(list->state, + list->primitive_topology, list->strides, list->dsv_format, &vk_render_pass))) + return false; + + /* The render pass cache ensures that we use the same Vulkan render pass + * object for compatible render passes. */ + if (list->pso_render_pass != vk_render_pass) + { + list->pso_render_pass = vk_render_pass; + d3d12_command_list_invalidate_current_framebuffer(list); + d3d12_command_list_invalidate_current_render_pass(list); + } + + VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, list->state->vk_bind_point, vk_pipeline)); + list->current_pipeline = vk_pipeline; + + return true; +} + +static void d3d12_command_list_prepare_descriptors(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + unsigned int variable_binding_size, unbounded_offset, table_index, heap_size, i; + const struct d3d12_root_signature *root_signature = bindings->root_signature; + const struct d3d12_descriptor_set_layout *layout; + struct d3d12_device *device = list->device; + const struct d3d12_desc *base_descriptor; + VkDescriptorSet vk_descriptor_set; + + if (bindings->descriptor_set_count && !bindings->in_use) + return; + + /* We cannot modify bound descriptor sets. We need a new descriptor set if + * we are about to update resource bindings. + * + * The Vulkan spec says: + * + * "The descriptor set contents bound by a call to + * vkCmdBindDescriptorSets may be consumed during host execution of the + * command, or during shader execution of the resulting draws, or any + * time in between. Thus, the contents must not be altered (overwritten + * by an update command, or freed) between when the command is recorded + * and when the command completes executing on the queue." + */ + bindings->descriptor_set_count = 0; + for (i = root_signature->main_set; i < root_signature->vk_set_count; ++i) + { + layout = &root_signature->descriptor_set_layouts[i]; + unbounded_offset = layout->unbounded_offset; + table_index = layout->table_index; + variable_binding_size = 0; + + if (unbounded_offset != UINT_MAX + /* Descriptors may not be set, eg. WoW. */ + && (base_descriptor = bindings->descriptor_tables[table_index])) + { + heap_size = vkd3d_gpu_descriptor_allocator_range_size_from_descriptor( + &device->gpu_descriptor_allocator, base_descriptor); + + if (heap_size < unbounded_offset) + WARN("Descriptor heap size %u is less than the offset %u of an unbounded range in table %u, " + "vk set %u.\n", heap_size, unbounded_offset, table_index, i); + else + variable_binding_size = heap_size - unbounded_offset; + } + + vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator, + layout->vk_layout, variable_binding_size, unbounded_offset != UINT_MAX); + bindings->descriptor_sets[bindings->descriptor_set_count++] = vk_descriptor_set; + } + + bindings->in_use = false; + + bindings->descriptor_table_dirty_mask |= bindings->descriptor_table_active_mask & root_signature->descriptor_table_mask; + bindings->push_descriptor_dirty_mask |= bindings->push_descriptor_active_mask & root_signature->push_descriptor_mask; +} + +static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_descriptor_write, + VkDescriptorImageInfo *vk_image_info, const struct d3d12_desc *descriptor, + const struct d3d12_root_descriptor_table_range *range, VkDescriptorSet *vk_descriptor_sets, + unsigned int index, bool use_array) +{ + uint32_t descriptor_range_magic = range->descriptor_magic; + const struct vkd3d_view *view = descriptor->u.view; + uint32_t vk_binding = range->binding; + uint32_t set = range->set; + + if (descriptor->magic != descriptor_range_magic) + return false; + + vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_write->pNext = NULL; + vk_descriptor_write->dstSet = vk_descriptor_sets[set]; + vk_descriptor_write->dstBinding = use_array ? vk_binding : vk_binding + index; + vk_descriptor_write->dstArrayElement = use_array ? index : 0; + vk_descriptor_write->descriptorCount = 1; + vk_descriptor_write->descriptorType = descriptor->vk_descriptor_type; + vk_descriptor_write->pImageInfo = NULL; + vk_descriptor_write->pBufferInfo = NULL; + vk_descriptor_write->pTexelBufferView = NULL; + + switch (descriptor->magic) + { + case VKD3D_DESCRIPTOR_MAGIC_CBV: + vk_descriptor_write->pBufferInfo = &descriptor->u.vk_cbv_info; + break; + + case VKD3D_DESCRIPTOR_MAGIC_SRV: + case VKD3D_DESCRIPTOR_MAGIC_UAV: + /* We use separate bindings for buffer and texture SRVs/UAVs. + * See d3d12_root_signature_init(). For unbounded ranges the + * descriptors exist in two consecutive sets, otherwise they occur + * in pairs in one set. */ + if (range->descriptor_count == UINT_MAX) + { + if (descriptor->vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + && descriptor->vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + { + vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; + vk_descriptor_write->dstBinding = 0; + } + } + else + { + if (!use_array) + vk_descriptor_write->dstBinding = vk_binding + 2 * index; + if (descriptor->vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + && descriptor->vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + ++vk_descriptor_write->dstBinding; + } + + if (descriptor->vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + || descriptor->vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + { + vk_descriptor_write->pTexelBufferView = &view->u.vk_buffer_view; + } + else + { + vk_image_info->sampler = VK_NULL_HANDLE; + vk_image_info->imageView = view->u.vk_image_view; + vk_image_info->imageLayout = descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_SRV + ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; + + vk_descriptor_write->pImageInfo = vk_image_info; + } + break; + + case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: + vk_image_info->sampler = view->u.vk_sampler; + vk_image_info->imageView = VK_NULL_HANDLE; + vk_image_info->imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + vk_descriptor_write->pImageInfo = vk_image_info; + break; + + default: + ERR("Invalid descriptor %#x.\n", descriptor->magic); + return false; + } + + return true; +} + +static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point, unsigned int index, struct d3d12_desc *base_descriptor) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + struct VkWriteDescriptorSet descriptor_writes[24], *current_descriptor_write; + const struct d3d12_root_signature *root_signature = bindings->root_signature; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + struct VkDescriptorImageInfo image_infos[24], *current_image_info; + const struct d3d12_root_descriptor_table *descriptor_table; + const struct d3d12_pipeline_state *state = list->state; + const struct d3d12_root_descriptor_table_range *range; + VkDevice vk_device = list->device->vk_device; + unsigned int i, j, k, descriptor_count; + struct d3d12_desc *descriptor; + unsigned int write_count = 0; + bool unbounded = false; + + descriptor_table = root_signature_get_descriptor_table(root_signature, index); + + current_descriptor_write = descriptor_writes; + current_image_info = image_infos; + for (i = 0; i < descriptor_table->range_count; ++i) + { + range = &descriptor_table->ranges[i]; + + /* The first unbounded range of each type is written until the heap end is reached. Do not repeat. */ + if (unbounded && i && range->type == descriptor_table->ranges[i - 1].type) + continue; + + descriptor = base_descriptor + range->offset; + + descriptor_count = range->descriptor_count; + if ((unbounded = descriptor_count == UINT_MAX)) + { + descriptor_count = vkd3d_gpu_descriptor_allocator_range_size_from_descriptor( + &list->device->gpu_descriptor_allocator, descriptor); + + if (descriptor_count > range->vk_binding_count) + { + ERR("Heap descriptor count %u exceeds maximum Vulkan count %u. Reducing to the Vulkan maximum.\n", + descriptor_count, range->vk_binding_count); + descriptor_count = range->vk_binding_count; + } + } + + for (j = 0; j < descriptor_count; ++j, ++descriptor) + { + unsigned int register_idx = range->base_register_idx + j; + + /* Track UAV counters. */ + if (range->descriptor_magic == VKD3D_DESCRIPTOR_MAGIC_UAV) + { + for (k = 0; k < state->uav_counters.binding_count; ++k) + { + if (state->uav_counters.bindings[k].register_space == range->register_space + && state->uav_counters.bindings[k].register_index == register_idx) + { + VkBufferView vk_counter_view = descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_UAV + ? descriptor->u.view->vk_counter_view : VK_NULL_HANDLE; + if (bindings->vk_uav_counter_views[k] != vk_counter_view) + bindings->uav_counters_dirty = true; + bindings->vk_uav_counter_views[k] = vk_counter_view; + break; + } + } + } + + /* Not all descriptors are necessarily populated if the range is unbounded. */ + if (descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_FREE) + continue; + + if (!vk_write_descriptor_set_from_d3d12_desc(current_descriptor_write, current_image_info, + descriptor, range, bindings->descriptor_sets, j, root_signature->use_descriptor_arrays)) + continue; + + ++write_count; + ++current_descriptor_write; + ++current_image_info; + + if (write_count == ARRAY_SIZE(descriptor_writes)) + { + VK_CALL(vkUpdateDescriptorSets(vk_device, write_count, descriptor_writes, 0, NULL)); + write_count = 0; + current_descriptor_write = descriptor_writes; + current_image_info = image_infos; + } + } + } + + VK_CALL(vkUpdateDescriptorSets(vk_device, write_count, descriptor_writes, 0, NULL)); +} + +static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *vk_descriptor_write, + const struct d3d12_root_parameter *root_parameter, VkDescriptorSet vk_descriptor_set, + VkBufferView *vk_buffer_view, const VkDescriptorBufferInfo *vk_buffer_info) +{ + const struct d3d12_root_descriptor *root_descriptor; + + switch (root_parameter->parameter_type) + { + case D3D12_ROOT_PARAMETER_TYPE_CBV: + vk_descriptor_write->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + break; + case D3D12_ROOT_PARAMETER_TYPE_SRV: + vk_descriptor_write->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + break; + case D3D12_ROOT_PARAMETER_TYPE_UAV: + vk_descriptor_write->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + break; + default: + ERR("Invalid root descriptor %#x.\n", root_parameter->parameter_type); + return false; + } + + root_descriptor = &root_parameter->u.descriptor; + + vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_write->pNext = NULL; + vk_descriptor_write->dstSet = vk_descriptor_set; + vk_descriptor_write->dstBinding = root_descriptor->binding; + vk_descriptor_write->dstArrayElement = 0; + vk_descriptor_write->descriptorCount = 1; + vk_descriptor_write->pImageInfo = NULL; + vk_descriptor_write->pBufferInfo = vk_buffer_info; + vk_descriptor_write->pTexelBufferView = vk_buffer_view; + + return true; +} + +static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct d3d12_root_signature *root_signature = bindings->root_signature; + VkWriteDescriptorSet *descriptor_writes = NULL, *current_descriptor_write; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + VkDescriptorBufferInfo *buffer_infos = NULL, *current_buffer_info; + const struct d3d12_root_parameter *root_parameter; + struct vkd3d_push_descriptor *push_descriptor; + struct d3d12_device *device = list->device; + VkDescriptorBufferInfo *vk_buffer_info; + unsigned int i, descriptor_count; + VkBufferView *vk_buffer_view; + + if (!bindings->push_descriptor_dirty_mask) + return; + + descriptor_count = vkd3d_popcount(bindings->push_descriptor_dirty_mask); + + if (!(descriptor_writes = vkd3d_calloc(descriptor_count, sizeof(*descriptor_writes)))) + return; + if (!(buffer_infos = vkd3d_calloc(descriptor_count, sizeof(*buffer_infos)))) + goto done; + + descriptor_count = 0; + current_buffer_info = buffer_infos; + current_descriptor_write = descriptor_writes; + for (i = 0; i < ARRAY_SIZE(bindings->push_descriptors); ++i) + { + if (!(bindings->push_descriptor_dirty_mask & (1u << i))) + continue; + + root_parameter = root_signature_get_root_descriptor(root_signature, i); + push_descriptor = &bindings->push_descriptors[i]; + + if (root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV) + { + vk_buffer_view = NULL; + vk_buffer_info = current_buffer_info; + vk_buffer_info->buffer = push_descriptor->u.cbv.vk_buffer; + vk_buffer_info->offset = push_descriptor->u.cbv.offset; + vk_buffer_info->range = VK_WHOLE_SIZE; + } + else + { + vk_buffer_view = &push_descriptor->u.vk_buffer_view; + vk_buffer_info = NULL; + } + + if (!vk_write_descriptor_set_from_root_descriptor(current_descriptor_write, + root_parameter, bindings->descriptor_sets[0], vk_buffer_view, vk_buffer_info)) + continue; + + ++descriptor_count; + ++current_descriptor_write; + ++current_buffer_info; + } + + VK_CALL(vkUpdateDescriptorSets(device->vk_device, descriptor_count, descriptor_writes, 0, NULL)); + bindings->push_descriptor_dirty_mask = 0; + +done: + vkd3d_free(descriptor_writes); + vkd3d_free(buffer_infos); +} + +static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_pipeline_state *state = list->state; + VkDevice vk_device = list->device->vk_device; + VkWriteDescriptorSet *vk_descriptor_writes; + VkDescriptorSet vk_descriptor_set; + unsigned int uav_counter_count; + unsigned int i; + + if (!state || !bindings->uav_counters_dirty) + return; + + uav_counter_count = state->uav_counters.binding_count; + if (!(vk_descriptor_writes = vkd3d_calloc(uav_counter_count, sizeof(*vk_descriptor_writes)))) + return; + if (!(vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set( + list->allocator, state->uav_counters.vk_set_layout, 0, false))) + goto done; + + for (i = 0; i < uav_counter_count; ++i) + { + const struct vkd3d_shader_uav_counter_binding *uav_counter = &state->uav_counters.bindings[i]; + const VkBufferView *vk_uav_counter_views = bindings->vk_uav_counter_views; + + assert(vk_uav_counter_views[i]); + + vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_writes[i].pNext = NULL; + vk_descriptor_writes[i].dstSet = vk_descriptor_set; + vk_descriptor_writes[i].dstBinding = uav_counter->binding.binding; + vk_descriptor_writes[i].dstArrayElement = 0; + vk_descriptor_writes[i].descriptorCount = 1; + vk_descriptor_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + vk_descriptor_writes[i].pImageInfo = NULL; + vk_descriptor_writes[i].pBufferInfo = NULL; + vk_descriptor_writes[i].pTexelBufferView = &vk_uav_counter_views[i]; + } + + VK_CALL(vkUpdateDescriptorSets(vk_device, uav_counter_count, vk_descriptor_writes, 0, NULL)); + + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bindings->vk_bind_point, + state->uav_counters.vk_pipeline_layout, state->uav_counters.set_index, 1, &vk_descriptor_set, 0, NULL)); + + bindings->uav_counters_dirty = false; + +done: + vkd3d_free(vk_descriptor_writes); +} + +static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + struct d3d12_desc *base_descriptor; + unsigned int i; + + if (!rs || !rs->vk_set_count) + return; + + if (bindings->descriptor_table_dirty_mask || bindings->push_descriptor_dirty_mask) + d3d12_command_list_prepare_descriptors(list, bind_point); + + for (i = 0; i < ARRAY_SIZE(bindings->descriptor_tables); ++i) + { + if (bindings->descriptor_table_dirty_mask & ((uint64_t)1 << i)) + { + if ((base_descriptor = bindings->descriptor_tables[i])) + d3d12_command_list_update_descriptor_table(list, bind_point, i, base_descriptor); + else + WARN("Descriptor table %u is not set.\n", i); + } + } + bindings->descriptor_table_dirty_mask = 0; + + d3d12_command_list_update_push_descriptors(list, bind_point); + + if (bindings->descriptor_set_count) + { + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bindings->vk_bind_point, + rs->vk_pipeline_layout, rs->main_set, bindings->descriptor_set_count, bindings->descriptor_sets, + 0, NULL)); + bindings->in_use = true; + } + + d3d12_command_list_update_uav_counter_descriptors(list, bind_point); +} + +static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list) +{ + d3d12_command_list_end_current_render_pass(list); + + if (!d3d12_command_list_update_compute_pipeline(list)) + return false; + + d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); + + return true; +} + +static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + struct d3d12_graphics_pipeline_state *graphics; + struct VkRenderPassBeginInfo begin_desc; + VkRenderPass vk_render_pass; + + if (!d3d12_command_list_update_graphics_pipeline(list)) + return false; + if (!d3d12_command_list_update_current_framebuffer(list)) + return false; + + d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); + + if (list->current_render_pass != VK_NULL_HANDLE) + return true; + + vk_render_pass = list->pso_render_pass; + assert(vk_render_pass); + + begin_desc.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + begin_desc.pNext = NULL; + begin_desc.renderPass = vk_render_pass; + begin_desc.framebuffer = list->current_framebuffer; + begin_desc.renderArea.offset.x = 0; + begin_desc.renderArea.offset.y = 0; + d3d12_command_list_get_fb_extent(list, + &begin_desc.renderArea.extent.width, &begin_desc.renderArea.extent.height, NULL); + begin_desc.clearValueCount = 0; + begin_desc.pClearValues = NULL; + VK_CALL(vkCmdBeginRenderPass(list->vk_command_buffer, &begin_desc, VK_SUBPASS_CONTENTS_INLINE)); + + list->current_render_pass = vk_render_pass; + + graphics = &list->state->u.graphics; + if (graphics->xfb_enabled) + { + VK_CALL(vkCmdBeginTransformFeedbackEXT(list->vk_command_buffer, 0, ARRAY_SIZE(list->so_counter_buffers), + list->so_counter_buffers, list->so_counter_buffer_offsets)); + + list->xfb_enabled = true; + } + + return true; +} + +static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_command_list *list) +{ + struct d3d12_graphics_pipeline_state *graphics = &list->state->u.graphics; + + /* In Vulkan, the strip cut value is derived from the index buffer format. */ + switch (graphics->index_buffer_strip_cut_value) + { + case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF: + if (list->index_buffer_format != DXGI_FORMAT_R16_UINT) + { + FIXME_ONCE("Strip cut value 0xffff is not supported with index buffer format %#x.\n", + list->index_buffer_format); + } + break; + + case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF: + if (list->index_buffer_format != DXGI_FORMAT_R32_UINT) + { + FIXME_ONCE("Strip cut value 0xffffffff is not supported with index buffer format %#x.\n", + list->index_buffer_format); + } + break; + + default: + break; + } +} + +static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList2 *iface, + UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location, + UINT start_instance_location) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, " + "start_vertex_location %u, start_instance_location %u.\n", + iface, vertex_count_per_instance, instance_count, + start_vertex_location, start_instance_location); + + vk_procs = &list->device->vk_procs; + + if (!d3d12_command_list_begin_render_pass(list)) + { + WARN("Failed to begin render pass, ignoring draw call.\n"); + return; + } + + VK_CALL(vkCmdDraw(list->vk_command_buffer, vertex_count_per_instance, + instance_count, start_vertex_location, start_instance_location)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList2 *iface, + UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location, + INT base_vertex_location, UINT start_instance_location) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, " + "base_vertex_location %d, start_instance_location %u.\n", + iface, index_count_per_instance, instance_count, start_vertex_location, + base_vertex_location, start_instance_location); + + if (!d3d12_command_list_begin_render_pass(list)) + { + WARN("Failed to begin render pass, ignoring draw call.\n"); + return; + } + + vk_procs = &list->device->vk_procs; + + d3d12_command_list_check_index_buffer_strip_cut_value(list); + + VK_CALL(vkCmdDrawIndexed(list->vk_command_buffer, index_count_per_instance, + instance_count, start_vertex_location, base_vertex_location, start_instance_location)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList2 *iface, + UINT x, UINT y, UINT z) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z); + + if (!d3d12_command_list_update_compute_state(list)) + { + WARN("Failed to update compute state, ignoring dispatch.\n"); + return; + } + + vk_procs = &list->device->vk_procs; + + VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList2 *iface, + ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_vk_device_procs *vk_procs; + VkBufferCopy buffer_copy; + + TRACE("iface %p, dst_resource %p, dst_offset %#"PRIx64", src_resource %p, " + "src_offset %#"PRIx64", byte_count %#"PRIx64".\n", + iface, dst, dst_offset, src, src_offset, byte_count); + + vk_procs = &list->device->vk_procs; + + dst_resource = unsafe_impl_from_ID3D12Resource(dst); + assert(d3d12_resource_is_buffer(dst_resource)); + src_resource = unsafe_impl_from_ID3D12Resource(src); + assert(d3d12_resource_is_buffer(src_resource)); + + d3d12_command_list_track_resource_usage(list, dst_resource); + d3d12_command_list_track_resource_usage(list, src_resource); + + d3d12_command_list_end_current_render_pass(list); + + buffer_copy.srcOffset = src_offset; + buffer_copy.dstOffset = dst_offset; + buffer_copy.size = byte_count; + + VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer, + src_resource->u.vk_buffer, dst_resource->u.vk_buffer, 1, &buffer_copy)); +} + +static void vk_image_subresource_layers_from_d3d12(VkImageSubresourceLayers *subresource, + const struct vkd3d_format *format, unsigned int sub_resource_idx, unsigned int miplevel_count) +{ + subresource->aspectMask = format->vk_aspect_mask; + subresource->mipLevel = sub_resource_idx % miplevel_count; + subresource->baseArrayLayer = sub_resource_idx / miplevel_count; + subresource->layerCount = 1; +} + +static void vk_extent_3d_from_d3d12_miplevel(VkExtent3D *extent, + const D3D12_RESOURCE_DESC *resource_desc, unsigned int miplevel_idx) +{ + extent->width = d3d12_resource_desc_get_width(resource_desc, miplevel_idx); + extent->height = d3d12_resource_desc_get_height(resource_desc, miplevel_idx); + extent->depth = d3d12_resource_desc_get_depth(resource_desc, miplevel_idx); +} + +static void vk_buffer_image_copy_from_d3d12(VkBufferImageCopy *copy, + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT *footprint, unsigned int sub_resource_idx, + const D3D12_RESOURCE_DESC *image_desc, const struct vkd3d_format *format, + const D3D12_BOX *src_box, unsigned int dst_x, unsigned int dst_y, unsigned int dst_z) +{ + copy->bufferOffset = footprint->Offset; + if (src_box) + { + VkDeviceSize row_count = footprint->Footprint.Height / format->block_height; + copy->bufferOffset += vkd3d_format_get_data_offset(format, footprint->Footprint.RowPitch, + row_count * footprint->Footprint.RowPitch, src_box->left, src_box->top, src_box->front); + } + copy->bufferRowLength = footprint->Footprint.RowPitch / + (format->byte_count * format->block_byte_count) * format->block_width; + copy->bufferImageHeight = footprint->Footprint.Height; + vk_image_subresource_layers_from_d3d12(©->imageSubresource, + format, sub_resource_idx, image_desc->MipLevels); + copy->imageOffset.x = dst_x; + copy->imageOffset.y = dst_y; + copy->imageOffset.z = dst_z; + + vk_extent_3d_from_d3d12_miplevel(©->imageExtent, image_desc, + copy->imageSubresource.mipLevel); + copy->imageExtent.width -= copy->imageOffset.x; + copy->imageExtent.height -= copy->imageOffset.y; + copy->imageExtent.depth -= copy->imageOffset.z; + + if (src_box) + { + copy->imageExtent.width = min(copy->imageExtent.width, src_box->right - src_box->left); + copy->imageExtent.height = min(copy->imageExtent.height, src_box->bottom - src_box->top); + copy->imageExtent.depth = min(copy->imageExtent.depth, src_box->back - src_box->front); + } + else + { + copy->imageExtent.width = min(copy->imageExtent.width, footprint->Footprint.Width); + copy->imageExtent.height = min(copy->imageExtent.height, footprint->Footprint.Height); + copy->imageExtent.depth = min(copy->imageExtent.depth, footprint->Footprint.Depth); + } +} + +static void vk_image_buffer_copy_from_d3d12(VkBufferImageCopy *copy, + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT *footprint, unsigned int sub_resource_idx, + const D3D12_RESOURCE_DESC *image_desc, const struct vkd3d_format *format, + const D3D12_BOX *src_box, unsigned int dst_x, unsigned int dst_y, unsigned int dst_z) +{ + VkDeviceSize row_count = footprint->Footprint.Height / format->block_height; + + copy->bufferOffset = footprint->Offset + vkd3d_format_get_data_offset(format, + footprint->Footprint.RowPitch, row_count * footprint->Footprint.RowPitch, dst_x, dst_y, dst_z); + copy->bufferRowLength = footprint->Footprint.RowPitch / + (format->byte_count * format->block_byte_count) * format->block_width; + copy->bufferImageHeight = footprint->Footprint.Height; + vk_image_subresource_layers_from_d3d12(©->imageSubresource, + format, sub_resource_idx, image_desc->MipLevels); + copy->imageOffset.x = src_box ? src_box->left : 0; + copy->imageOffset.y = src_box ? src_box->top : 0; + copy->imageOffset.z = src_box ? src_box->front : 0; + if (src_box) + { + copy->imageExtent.width = src_box->right - src_box->left; + copy->imageExtent.height = src_box->bottom - src_box->top; + copy->imageExtent.depth = src_box->back - src_box->front; + } + else + { + unsigned int miplevel = copy->imageSubresource.mipLevel; + vk_extent_3d_from_d3d12_miplevel(©->imageExtent, image_desc, miplevel); + } +} + +static void vk_image_copy_from_d3d12(VkImageCopy *image_copy, + unsigned int src_sub_resource_idx, unsigned int dst_sub_resource_idx, + const D3D12_RESOURCE_DESC *src_desc, const D3D12_RESOURCE_DESC *dst_desc, + const struct vkd3d_format *src_format, const struct vkd3d_format *dst_format, + const D3D12_BOX *src_box, unsigned int dst_x, unsigned int dst_y, unsigned int dst_z) +{ + vk_image_subresource_layers_from_d3d12(&image_copy->srcSubresource, + src_format, src_sub_resource_idx, src_desc->MipLevels); + image_copy->srcOffset.x = src_box ? src_box->left : 0; + image_copy->srcOffset.y = src_box ? src_box->top : 0; + image_copy->srcOffset.z = src_box ? src_box->front : 0; + vk_image_subresource_layers_from_d3d12(&image_copy->dstSubresource, + dst_format, dst_sub_resource_idx, dst_desc->MipLevels); + image_copy->dstOffset.x = dst_x; + image_copy->dstOffset.y = dst_y; + image_copy->dstOffset.z = dst_z; + if (src_box) + { + image_copy->extent.width = src_box->right - src_box->left; + image_copy->extent.height = src_box->bottom - src_box->top; + image_copy->extent.depth = src_box->back - src_box->front; + } + else + { + unsigned int miplevel = image_copy->srcSubresource.mipLevel; + vk_extent_3d_from_d3d12_miplevel(&image_copy->extent, src_desc, miplevel); + } +} + +static HRESULT d3d12_command_list_allocate_transfer_buffer(struct d3d12_command_list *list, + VkDeviceSize size, struct vkd3d_buffer *buffer) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + struct d3d12_device *device = list->device; + D3D12_HEAP_PROPERTIES heap_properties; + D3D12_RESOURCE_DESC buffer_desc; + HRESULT hr; + + memset(&heap_properties, 0, sizeof(heap_properties)); + heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT; + + buffer_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + buffer_desc.Alignment = 0; + buffer_desc.Width = size; + buffer_desc.Height = 1; + buffer_desc.DepthOrArraySize = 1; + buffer_desc.MipLevels = 1; + buffer_desc.Format = DXGI_FORMAT_UNKNOWN; + buffer_desc.SampleDesc.Count = 1; + buffer_desc.SampleDesc.Quality = 0; + buffer_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + buffer_desc.Flags = D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + + if (FAILED(hr = vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_NONE, + &buffer_desc, &buffer->vk_buffer))) + return hr; + if (FAILED(hr = vkd3d_allocate_buffer_memory(device, buffer->vk_buffer, + &heap_properties, D3D12_HEAP_FLAG_NONE, &buffer->vk_memory, NULL, NULL))) + { + VK_CALL(vkDestroyBuffer(device->vk_device, buffer->vk_buffer, NULL)); + return hr; + } + + if (!d3d12_command_allocator_add_transfer_buffer(list->allocator, buffer)) + { + ERR("Failed to add transfer buffer.\n"); + vkd3d_buffer_destroy(buffer, device); + return E_OUTOFMEMORY; + } + + return S_OK; +} + +/* In Vulkan, each depth/stencil format is only compatible with itself. + * This means that we are not allowed to copy texture regions directly between + * depth/stencil and color formats. + * + * FIXME: Implement color <-> depth/stencil blits in shaders. + */ +static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_command_list *list, + struct d3d12_resource *dst_resource, unsigned int dst_sub_resource_idx, + const struct vkd3d_format *dst_format, struct d3d12_resource *src_resource, + unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const D3D12_RESOURCE_DESC *dst_desc = &dst_resource->desc; + const D3D12_RESOURCE_DESC *src_desc = &src_resource->desc; + unsigned int dst_miplevel_idx, src_miplevel_idx; + struct vkd3d_buffer transfer_buffer; + VkBufferImageCopy buffer_image_copy; + VkBufferMemoryBarrier vk_barrier; + VkDeviceSize buffer_size; + HRESULT hr; + + WARN("Copying incompatible texture formats %#x, %#x -> %#x, %#x.\n", + src_format->dxgi_format, src_format->vk_format, + dst_format->dxgi_format, dst_format->vk_format); + + assert(d3d12_resource_is_texture(dst_resource)); + assert(d3d12_resource_is_texture(src_resource)); + assert(!vkd3d_format_is_compressed(dst_format)); + assert(!vkd3d_format_is_compressed(src_format)); + assert(dst_format->byte_count == src_format->byte_count); + + buffer_image_copy.bufferOffset = 0; + buffer_image_copy.bufferRowLength = 0; + buffer_image_copy.bufferImageHeight = 0; + vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, + src_format, src_sub_resource_idx, src_desc->MipLevels); + src_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; + buffer_image_copy.imageOffset.x = 0; + buffer_image_copy.imageOffset.y = 0; + buffer_image_copy.imageOffset.z = 0; + vk_extent_3d_from_d3d12_miplevel(&buffer_image_copy.imageExtent, src_desc, src_miplevel_idx); + + buffer_size = src_format->byte_count * buffer_image_copy.imageExtent.width * + buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth; + if (FAILED(hr = d3d12_command_list_allocate_transfer_buffer(list, buffer_size, &transfer_buffer))) + { + ERR("Failed to allocate transfer buffer, hr %#x.\n", hr); + return; + } + + VK_CALL(vkCmdCopyImageToBuffer(list->vk_command_buffer, + src_resource->u.vk_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + transfer_buffer.vk_buffer, 1, &buffer_image_copy)); + + vk_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + vk_barrier.pNext = NULL; + vk_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + vk_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + vk_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vk_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vk_barrier.buffer = transfer_buffer.vk_buffer; + vk_barrier.offset = 0; + vk_barrier.size = VK_WHOLE_SIZE; + VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + 0, NULL, 1, &vk_barrier, 0, NULL)); + + vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, + dst_format, dst_sub_resource_idx, dst_desc->MipLevels); + dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; + + assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == + d3d12_resource_desc_get_width(dst_desc, dst_miplevel_idx)); + assert(d3d12_resource_desc_get_height(src_desc, src_miplevel_idx) == + d3d12_resource_desc_get_height(dst_desc, dst_miplevel_idx)); + assert(d3d12_resource_desc_get_depth(src_desc, src_miplevel_idx) == + d3d12_resource_desc_get_depth(dst_desc, dst_miplevel_idx)); + + VK_CALL(vkCmdCopyBufferToImage(list->vk_command_buffer, + transfer_buffer.vk_buffer, dst_resource->u.vk_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &buffer_image_copy)); +} + +static bool validate_d3d12_box(const D3D12_BOX *box) +{ + return box->right > box->left + && box->bottom > box->top + && box->back > box->front; +} + +static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList2 *iface, + const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z, + const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_format *src_format, *dst_format; + const struct vkd3d_vk_device_procs *vk_procs; + VkBufferImageCopy buffer_image_copy; + VkImageCopy image_copy; + + TRACE("iface %p, dst %p, dst_x %u, dst_y %u, dst_z %u, src %p, src_box %p.\n", + iface, dst, dst_x, dst_y, dst_z, src, src_box); + + if (src_box && !validate_d3d12_box(src_box)) + { + WARN("Empty box %s.\n", debug_d3d12_box(src_box)); + return; + } + + vk_procs = &list->device->vk_procs; + + dst_resource = unsafe_impl_from_ID3D12Resource(dst->pResource); + src_resource = unsafe_impl_from_ID3D12Resource(src->pResource); + + d3d12_command_list_track_resource_usage(list, dst_resource); + d3d12_command_list_track_resource_usage(list, src_resource); + + d3d12_command_list_end_current_render_pass(list); + + if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX + && dst->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT) + { + assert(d3d12_resource_is_buffer(dst_resource)); + assert(d3d12_resource_is_texture(src_resource)); + + if (!(dst_format = vkd3d_format_from_d3d12_resource_desc(list->device, + &src_resource->desc, dst->u.PlacedFootprint.Footprint.Format))) + { + WARN("Invalid format %#x.\n", dst->u.PlacedFootprint.Footprint.Format); + return; + } + + if (dst_format->is_emulated) + { + FIXME("Format %#x is not supported yet.\n", dst_format->dxgi_format); + return; + } + + if ((dst_format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + && (dst_format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)) + FIXME("Depth-stencil format %#x not fully supported yet.\n", dst_format->dxgi_format); + + vk_image_buffer_copy_from_d3d12(&buffer_image_copy, &dst->u.PlacedFootprint, + src->u.SubresourceIndex, &src_resource->desc, dst_format, src_box, dst_x, dst_y, dst_z); + VK_CALL(vkCmdCopyImageToBuffer(list->vk_command_buffer, + src_resource->u.vk_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + dst_resource->u.vk_buffer, 1, &buffer_image_copy)); + } + else if (src->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT + && dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX) + { + assert(d3d12_resource_is_texture(dst_resource)); + assert(d3d12_resource_is_buffer(src_resource)); + + if (!(src_format = vkd3d_format_from_d3d12_resource_desc(list->device, + &dst_resource->desc, src->u.PlacedFootprint.Footprint.Format))) + { + WARN("Invalid format %#x.\n", src->u.PlacedFootprint.Footprint.Format); + return; + } + + if (src_format->is_emulated) + { + FIXME("Format %#x is not supported yet.\n", src_format->dxgi_format); + return; + } + + if ((src_format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + && (src_format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)) + FIXME("Depth-stencil format %#x not fully supported yet.\n", src_format->dxgi_format); + + vk_buffer_image_copy_from_d3d12(&buffer_image_copy, &src->u.PlacedFootprint, + dst->u.SubresourceIndex, &dst_resource->desc, src_format, src_box, dst_x, dst_y, dst_z); + VK_CALL(vkCmdCopyBufferToImage(list->vk_command_buffer, + src_resource->u.vk_buffer, dst_resource->u.vk_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &buffer_image_copy)); + } + else if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX + && dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX) + { + assert(d3d12_resource_is_texture(dst_resource)); + assert(d3d12_resource_is_texture(src_resource)); + + dst_format = dst_resource->format; + src_format = src_resource->format; + + if ((dst_format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + && (dst_format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)) + FIXME("Depth-stencil format %#x not fully supported yet.\n", dst_format->dxgi_format); + if ((src_format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + && (src_format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)) + FIXME("Depth-stencil format %#x not fully supported yet.\n", src_format->dxgi_format); + + if (dst_format->vk_aspect_mask != src_format->vk_aspect_mask) + { + d3d12_command_list_copy_incompatible_texture_region(list, + dst_resource, dst->u.SubresourceIndex, dst_format, + src_resource, src->u.SubresourceIndex, src_format); + return; + } + + vk_image_copy_from_d3d12(&image_copy, src->u.SubresourceIndex, dst->u.SubresourceIndex, + &src_resource->desc, &dst_resource->desc, src_format, dst_format, + src_box, dst_x, dst_y, dst_z); + VK_CALL(vkCmdCopyImage(list->vk_command_buffer, src_resource->u.vk_image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_resource->u.vk_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy)); + } + else + { + FIXME("Copy type %#x -> %#x not implemented.\n", src->Type, dst->Type); + } +} + +static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList2 *iface, + ID3D12Resource *dst, ID3D12Resource *src) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_vk_device_procs *vk_procs; + VkBufferCopy vk_buffer_copy; + VkImageCopy vk_image_copy; + unsigned int layer_count; + unsigned int i; + + TRACE("iface %p, dst_resource %p, src_resource %p.\n", iface, dst, src); + + vk_procs = &list->device->vk_procs; + + dst_resource = unsafe_impl_from_ID3D12Resource(dst); + src_resource = unsafe_impl_from_ID3D12Resource(src); + + d3d12_command_list_track_resource_usage(list, dst_resource); + d3d12_command_list_track_resource_usage(list, src_resource); + + d3d12_command_list_end_current_render_pass(list); + + if (d3d12_resource_is_buffer(dst_resource)) + { + assert(d3d12_resource_is_buffer(src_resource)); + assert(src_resource->desc.Width == dst_resource->desc.Width); + + vk_buffer_copy.srcOffset = 0; + vk_buffer_copy.dstOffset = 0; + vk_buffer_copy.size = dst_resource->desc.Width; + VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer, + src_resource->u.vk_buffer, dst_resource->u.vk_buffer, 1, &vk_buffer_copy)); + } + else + { + layer_count = d3d12_resource_desc_get_layer_count(&dst_resource->desc); + + assert(d3d12_resource_is_texture(dst_resource)); + assert(d3d12_resource_is_texture(src_resource)); + assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); + assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); + + for (i = 0; i < dst_resource->desc.MipLevels; ++i) + { + vk_image_copy_from_d3d12(&vk_image_copy, i, i, &src_resource->desc, &dst_resource->desc, + src_resource->format, dst_resource->format, NULL, 0, 0, 0); + vk_image_copy.dstSubresource.layerCount = layer_count; + vk_image_copy.srcSubresource.layerCount = layer_count; + VK_CALL(vkCmdCopyImage(list->vk_command_buffer, src_resource->u.vk_image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_resource->u.vk_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_copy)); + } + } +} + +static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList2 *iface, + ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, + const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, + D3D12_TILE_COPY_FLAGS flags) +{ + FIXME("iface %p, tiled_resource %p, tile_region_start_coordinate %p, tile_region_size %p, " + "buffer %p, buffer_offset %#"PRIx64", flags %#x stub!\n", + iface, tiled_resource, tile_region_start_coordinate, tile_region_size, + buffer, buffer_offset, flags); +} + +static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList2 *iface, + ID3D12Resource *dst, UINT dst_sub_resource_idx, + ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct vkd3d_format *src_format, *dst_format, *vk_format; + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_vk_device_procs *vk_procs; + const struct d3d12_device *device; + VkImageResolve vk_image_resolve; + + TRACE("iface %p, dst_resource %p, dst_sub_resource_idx %u, src_resource %p, src_sub_resource_idx %u, " + "format %#x.\n", iface, dst, dst_sub_resource_idx, src, src_sub_resource_idx, format); + + device = list->device; + vk_procs = &device->vk_procs; + + dst_resource = unsafe_impl_from_ID3D12Resource(dst); + src_resource = unsafe_impl_from_ID3D12Resource(src); + + assert(d3d12_resource_is_texture(dst_resource)); + assert(d3d12_resource_is_texture(src_resource)); + + d3d12_command_list_track_resource_usage(list, dst_resource); + d3d12_command_list_track_resource_usage(list, src_resource); + + d3d12_command_list_end_current_render_pass(list); + + dst_format = dst_resource->format; + src_format = src_resource->format; + + if (dst_format->type == VKD3D_FORMAT_TYPE_TYPELESS || src_format->type == VKD3D_FORMAT_TYPE_TYPELESS) + { + if (!(vk_format = vkd3d_format_from_d3d12_resource_desc(device, &dst_resource->desc, format))) + { + WARN("Invalid format %#x.\n", format); + return; + } + if (dst_format->vk_format != src_format->vk_format || dst_format->vk_format != vk_format->vk_format) + { + FIXME("Not implemented for typeless resources.\n"); + return; + } + } + + /* Resolve of depth/stencil images is not supported in Vulkan. */ + if ((dst_format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) + || (src_format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) + { + FIXME("Resolve of depth/stencil images is not implemented yet.\n"); + return; + } + + vk_image_subresource_layers_from_d3d12(&vk_image_resolve.srcSubresource, + src_format, src_sub_resource_idx, src_resource->desc.MipLevels); + memset(&vk_image_resolve.srcOffset, 0, sizeof(vk_image_resolve.srcOffset)); + vk_image_subresource_layers_from_d3d12(&vk_image_resolve.dstSubresource, + dst_format, dst_sub_resource_idx, dst_resource->desc.MipLevels); + memset(&vk_image_resolve.dstOffset, 0, sizeof(vk_image_resolve.dstOffset)); + vk_extent_3d_from_d3d12_miplevel(&vk_image_resolve.extent, + &dst_resource->desc, vk_image_resolve.dstSubresource.mipLevel); + + VK_CALL(vkCmdResolveImage(list->vk_command_buffer, src_resource->u.vk_image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_resource->u.vk_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList2 *iface, + D3D12_PRIMITIVE_TOPOLOGY topology) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, topology %#x.\n", iface, topology); + + if (list->primitive_topology == topology) + return; + + list->primitive_topology = topology; + d3d12_command_list_invalidate_current_pipeline(list); +} + +static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList2 *iface, + UINT viewport_count, const D3D12_VIEWPORT *viewports) +{ + VkViewport vk_viewports[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i; + + TRACE("iface %p, viewport_count %u, viewports %p.\n", iface, viewport_count, viewports); + + if (viewport_count > ARRAY_SIZE(vk_viewports)) + { + FIXME("Viewport count %u > D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE.\n", viewport_count); + viewport_count = ARRAY_SIZE(vk_viewports); + } + + for (i = 0; i < viewport_count; ++i) + { + vk_viewports[i].x = viewports[i].TopLeftX; + vk_viewports[i].y = viewports[i].TopLeftY + viewports[i].Height; + vk_viewports[i].width = viewports[i].Width; + vk_viewports[i].height = -viewports[i].Height; + vk_viewports[i].minDepth = viewports[i].MinDepth; + vk_viewports[i].maxDepth = viewports[i].MaxDepth; + + if (!vk_viewports[i].width || !vk_viewports[i].height) + { + FIXME_ONCE("Invalid viewport %u, ignoring RSSetViewports().\n", i); + return; + } + } + + vk_procs = &list->device->vk_procs; + VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, viewport_count, vk_viewports)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList2 *iface, + UINT rect_count, const D3D12_RECT *rects) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + VkRect2D vk_rects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i; + + TRACE("iface %p, rect_count %u, rects %p.\n", iface, rect_count, rects); + + if (rect_count > ARRAY_SIZE(vk_rects)) + { + FIXME("Rect count %u > D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE.\n", rect_count); + rect_count = ARRAY_SIZE(vk_rects); + } + + for (i = 0; i < rect_count; ++i) + { + vk_rects[i].offset.x = rects[i].left; + vk_rects[i].offset.y = rects[i].top; + vk_rects[i].extent.width = rects[i].right - rects[i].left; + vk_rects[i].extent.height = rects[i].bottom - rects[i].top; + } + + vk_procs = &list->device->vk_procs; + VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, rect_count, vk_rects)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList2 *iface, + const FLOAT blend_factor[4]) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, blend_factor %p.\n", iface, blend_factor); + + vk_procs = &list->device->vk_procs; + VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer, blend_factor)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList2 *iface, + UINT stencil_ref) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref); + + vk_procs = &list->device->vk_procs; + VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer, VK_STENCIL_FRONT_AND_BACK, stencil_ref)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList2 *iface, + ID3D12PipelineState *pipeline_state) +{ + struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state); + + if (list->state == state) + return; + + d3d12_command_list_invalidate_bindings(list, state); + d3d12_command_list_invalidate_current_pipeline(list); + + list->state = state; +} + +static bool is_ds_multiplanar_resolvable(unsigned int first_state, unsigned int second_state) +{ + /* Only combinations of depth/stencil read/write are supported. */ + return first_state == second_state + || ((first_state & (D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_DEPTH_WRITE)) + && (second_state & (D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_DEPTH_WRITE))); +} + +static unsigned int d3d12_find_ds_multiplanar_transition(const D3D12_RESOURCE_BARRIER *barriers, + unsigned int i, unsigned int barrier_count, unsigned int sub_resource_count) +{ + unsigned int sub_resource_idx = barriers[i].u.Transition.Subresource; + unsigned int j; + + for (j = i + 1; j < barrier_count; ++j) + { + if (barriers[j].Type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION + && barriers[j].u.Transition.pResource == barriers[i].u.Transition.pResource + && sub_resource_idx % sub_resource_count == barriers[j].u.Transition.Subresource % sub_resource_count) + { + /* Second barrier must be for a different plane. */ + if (barriers[j].u.Transition.Subresource == sub_resource_idx) + return 0; + + /* Validate the second barrier and check if the combination of two states is supported. */ + if (!is_valid_resource_state(barriers[j].u.Transition.StateBefore) + || !is_ds_multiplanar_resolvable(barriers[i].u.Transition.StateBefore, barriers[j].u.Transition.StateBefore) + || !is_valid_resource_state(barriers[j].u.Transition.StateAfter) + || !is_ds_multiplanar_resolvable(barriers[i].u.Transition.StateAfter, barriers[j].u.Transition.StateAfter) + || barriers[j].u.Transition.Subresource >= sub_resource_count * 2u) + return 0; + + return j; + } + } + return 0; +} + +static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList2 *iface, + UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + bool have_aliasing_barriers = false, have_split_barriers = false; + const struct vkd3d_vk_device_procs *vk_procs; + const struct vkd3d_vulkan_info *vk_info; + bool *multiplanar_handled = NULL; + unsigned int i; + + TRACE("iface %p, barrier_count %u, barriers %p.\n", iface, barrier_count, barriers); + + vk_procs = &list->device->vk_procs; + vk_info = &list->device->vk_info; + + d3d12_command_list_end_current_render_pass(list); + + for (i = 0; i < barrier_count; ++i) + { + unsigned int sub_resource_idx = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + VkPipelineStageFlags src_stage_mask = 0, dst_stage_mask = 0; + VkAccessFlags src_access_mask = 0, dst_access_mask = 0; + const D3D12_RESOURCE_BARRIER *current = &barriers[i]; + VkImageLayout layout_before, layout_after; + struct d3d12_resource *resource; + + have_split_barriers = have_split_barriers + || (current->Flags & D3D12_RESOURCE_BARRIER_FLAG_BEGIN_ONLY) + || (current->Flags & D3D12_RESOURCE_BARRIER_FLAG_END_ONLY); + + if (current->Flags & D3D12_RESOURCE_BARRIER_FLAG_BEGIN_ONLY) + continue; + + switch (current->Type) + { + case D3D12_RESOURCE_BARRIER_TYPE_TRANSITION: + { + unsigned int state_before, state_after, stencil_state_before = 0, stencil_state_after = 0; + const D3D12_RESOURCE_TRANSITION_BARRIER *transition = ¤t->u.Transition; + + if (!is_valid_resource_state(transition->StateBefore)) + { + d3d12_command_list_mark_as_invalid(list, + "Invalid StateBefore %#x (barrier %u).", transition->StateBefore, i); + continue; + } + if (!is_valid_resource_state(transition->StateAfter)) + { + d3d12_command_list_mark_as_invalid(list, + "Invalid StateAfter %#x (barrier %u).", transition->StateAfter, i); + continue; + } + + if (!(resource = unsafe_impl_from_ID3D12Resource(transition->pResource))) + { + d3d12_command_list_mark_as_invalid(list, "A resource pointer is NULL."); + continue; + } + + if (multiplanar_handled && multiplanar_handled[i]) + continue; + + state_before = transition->StateBefore; + state_after = transition->StateAfter; + + sub_resource_idx = transition->Subresource; + + if (sub_resource_idx != D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES + && (resource->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) + { + unsigned int sub_resource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); + unsigned int j = d3d12_find_ds_multiplanar_transition(barriers, i, barrier_count, sub_resource_count); + if (j && (multiplanar_handled || (multiplanar_handled = vkd3d_calloc(barrier_count, sizeof(*multiplanar_handled))))) + { + multiplanar_handled[j] = true; + if (sub_resource_idx >= sub_resource_count) + { + sub_resource_idx -= sub_resource_count; + /* The stencil barrier is at i, depth at j. */ + state_before = barriers[j].u.Transition.StateBefore; + state_after = barriers[j].u.Transition.StateAfter; + stencil_state_before = transition->StateBefore; + stencil_state_after = transition->StateAfter; + } + else + { + /* Depth at i, stencil at j. */ + stencil_state_before = barriers[j].u.Transition.StateBefore; + stencil_state_after = barriers[j].u.Transition.StateAfter; + } + } + else if (sub_resource_idx >= sub_resource_count) + { + FIXME_ONCE("Unhandled sub-resource idx %u.\n", sub_resource_idx); + continue; + } + } + + if (!vk_barrier_parameters_from_d3d12_resource_state(state_before, stencil_state_before, + resource, list->vk_queue_flags, vk_info, &src_access_mask, &src_stage_mask, &layout_before)) + { + FIXME("Unhandled state %#x.\n", state_before); + continue; + } + if (!vk_barrier_parameters_from_d3d12_resource_state(state_after, stencil_state_after, + resource, list->vk_queue_flags, vk_info, &dst_access_mask, &dst_stage_mask, &layout_after)) + { + FIXME("Unhandled state %#x.\n", state_after); + continue; + } + + TRACE("Transition barrier (resource %p, subresource %#x, before %#x, after %#x).\n", + resource, transition->Subresource, transition->StateBefore, transition->StateAfter); + break; + } + + case D3D12_RESOURCE_BARRIER_TYPE_UAV: + { + const D3D12_RESOURCE_UAV_BARRIER *uav = ¤t->u.UAV; + VkPipelineStageFlags stage_mask; + VkImageLayout image_layout; + VkAccessFlags access_mask; + + resource = unsafe_impl_from_ID3D12Resource(uav->pResource); + vk_barrier_parameters_from_d3d12_resource_state(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0, + resource, list->vk_queue_flags, vk_info, &access_mask, &stage_mask, &image_layout); + src_access_mask = dst_access_mask = access_mask; + src_stage_mask = dst_stage_mask = stage_mask; + layout_before = layout_after = image_layout; + + TRACE("UAV barrier (resource %p).\n", resource); + break; + } + + case D3D12_RESOURCE_BARRIER_TYPE_ALIASING: + have_aliasing_barriers = true; + continue; + default: + WARN("Invalid barrier type %#x.\n", current->Type); + continue; + } + + if (resource) + d3d12_command_list_track_resource_usage(list, resource); + + if (!resource) + { + VkMemoryBarrier vk_barrier; + + vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + vk_barrier.pNext = NULL; + vk_barrier.srcAccessMask = src_access_mask; + vk_barrier.dstAccessMask = dst_access_mask; + + VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer, src_stage_mask, dst_stage_mask, 0, + 1, &vk_barrier, 0, NULL, 0, NULL)); + } + else if (d3d12_resource_is_buffer(resource)) + { + VkBufferMemoryBarrier vk_barrier; + + vk_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + vk_barrier.pNext = NULL; + vk_barrier.srcAccessMask = src_access_mask; + vk_barrier.dstAccessMask = dst_access_mask; + vk_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vk_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vk_barrier.buffer = resource->u.vk_buffer; + vk_barrier.offset = 0; + vk_barrier.size = VK_WHOLE_SIZE; + + VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer, src_stage_mask, dst_stage_mask, 0, + 0, NULL, 1, &vk_barrier, 0, NULL)); + } + else + { + VkImageMemoryBarrier vk_barrier; + + vk_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + vk_barrier.pNext = NULL; + vk_barrier.srcAccessMask = src_access_mask; + vk_barrier.dstAccessMask = dst_access_mask; + vk_barrier.oldLayout = layout_before; + vk_barrier.newLayout = layout_after; + vk_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vk_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vk_barrier.image = resource->u.vk_image; + + vk_barrier.subresourceRange.aspectMask = resource->format->vk_aspect_mask; + if (sub_resource_idx == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES) + { + vk_barrier.subresourceRange.baseMipLevel = 0; + vk_barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; + vk_barrier.subresourceRange.baseArrayLayer = 0; + vk_barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + } + else + { + /* FIXME: Some formats in D3D12 are planar. Each plane is a separate sub-resource. */ + if (sub_resource_idx >= d3d12_resource_desc_get_sub_resource_count(&resource->desc)) + { + FIXME_ONCE("Unhandled sub-resource idx %u.\n", sub_resource_idx); + continue; + } + + vk_barrier.subresourceRange.baseMipLevel = sub_resource_idx % resource->desc.MipLevels; + vk_barrier.subresourceRange.levelCount = 1; + vk_barrier.subresourceRange.baseArrayLayer = sub_resource_idx / resource->desc.MipLevels; + vk_barrier.subresourceRange.layerCount = 1; + } + + VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer, src_stage_mask, dst_stage_mask, 0, + 0, NULL, 0, NULL, 1, &vk_barrier)); + } + } + + vkd3d_free(multiplanar_handled); + + if (have_aliasing_barriers) + FIXME_ONCE("Aliasing barriers not implemented yet.\n"); + + /* Vulkan doesn't support split barriers. */ + if (have_split_barriers) + WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n"); +} + +static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList2 *iface, + ID3D12GraphicsCommandList *command_list) +{ + FIXME("iface %p, command_list %p stub!\n", iface, command_list); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList2 *iface, + UINT heap_count, ID3D12DescriptorHeap *const *heaps) +{ + TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); + + /* Our current implementation does not need this method. + * + * It could be used to validate descriptor tables but we do not have an + * equivalent of the D3D12 Debug Layer. */ +} + +static void d3d12_command_list_set_root_signature(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point, const struct d3d12_root_signature *root_signature) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + + if (bindings->root_signature == root_signature) + return; + + bindings->root_signature = root_signature; + + d3d12_command_list_invalidate_root_parameters(list, bind_point); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList2 *iface, + ID3D12RootSignature *root_signature) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_signature %p.\n", iface, root_signature); + + d3d12_command_list_set_root_signature(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE, + unsafe_impl_from_ID3D12RootSignature(root_signature)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList2 *iface, + ID3D12RootSignature *root_signature) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_signature %p.\n", iface, root_signature); + + d3d12_command_list_set_root_signature(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS, + unsafe_impl_from_ID3D12RootSignature(root_signature)); +} + +static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point, unsigned int index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct d3d12_root_signature *root_signature = bindings->root_signature; + struct d3d12_desc *desc; + + assert(root_signature_get_descriptor_table(root_signature, index)); + + assert(index < ARRAY_SIZE(bindings->descriptor_tables)); + desc = d3d12_desc_from_gpu_handle(base_descriptor); + + if (bindings->descriptor_tables[index] == desc) + return; + + if (desc && !vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&list->device->gpu_descriptor_allocator, + desc)) + { + /* Failure to find a heap means the descriptor handle is from + * the wrong heap type or not a handle at all. */ + ERR("Invalid heap for base descriptor %"PRIx64".\n", base_descriptor.ptr); + /* TODO: Mark list as invalid? */ + return; + } + + bindings->descriptor_tables[index] = desc; + bindings->descriptor_table_dirty_mask |= (uint64_t)1 << index; + bindings->descriptor_table_active_mask |= (uint64_t)1 << index; +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, + UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", + iface, root_parameter_index, base_descriptor.ptr); + + d3d12_command_list_set_descriptor_table(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE, + root_parameter_index, base_descriptor); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, + UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", + iface, root_parameter_index, base_descriptor.ptr); + + d3d12_command_list_set_descriptor_table(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS, + root_parameter_index, base_descriptor); +} + +static void d3d12_command_list_set_root_constants(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point, unsigned int index, unsigned int offset, + unsigned int count, const void *data) +{ + const struct d3d12_root_signature *root_signature = list->pipeline_bindings[bind_point].root_signature; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_constant *c; + + c = root_signature_get_32bit_constants(root_signature, index); + VK_CALL(vkCmdPushConstants(list->vk_command_buffer, root_signature->vk_pipeline_layout, + c->stage_flags, c->offset + offset * sizeof(uint32_t), count * sizeof(uint32_t), data)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, + UINT root_parameter_index, UINT data, UINT dst_offset) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", + iface, root_parameter_index, data, dst_offset); + + d3d12_command_list_set_root_constants(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE, + root_parameter_index, dst_offset, 1, &data); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, + UINT root_parameter_index, UINT data, UINT dst_offset) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", + iface, root_parameter_index, data, dst_offset); + + d3d12_command_list_set_root_constants(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS, + root_parameter_index, dst_offset, 1, &data); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, + UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", + iface, root_parameter_index, constant_count, data, dst_offset); + + d3d12_command_list_set_root_constants(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE, + root_parameter_index, dst_offset, constant_count, data); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, + UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", + iface, root_parameter_index, constant_count, data, dst_offset); + + d3d12_command_list_set_root_constants(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS, + root_parameter_index, dst_offset, constant_count, data); +} + +static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point, unsigned int index, D3D12_GPU_VIRTUAL_ADDRESS gpu_address) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct d3d12_root_signature *root_signature = bindings->root_signature; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; + const struct d3d12_root_parameter *root_parameter; + struct VkWriteDescriptorSet descriptor_write; + struct VkDescriptorBufferInfo buffer_info; + struct d3d12_resource *resource; + + root_parameter = root_signature_get_root_descriptor(root_signature, index); + assert(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); + + resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address); + buffer_info.buffer = resource->u.vk_buffer; + buffer_info.offset = gpu_address - resource->gpu_address; + buffer_info.range = resource->desc.Width - buffer_info.offset; + buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange); + + if (vk_info->KHR_push_descriptor) + { + vk_write_descriptor_set_from_root_descriptor(&descriptor_write, + root_parameter, VK_NULL_HANDLE, NULL, &buffer_info); + VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point, + root_signature->vk_pipeline_layout, 0, 1, &descriptor_write)); + } + else + { + d3d12_command_list_prepare_descriptors(list, bind_point); + vk_write_descriptor_set_from_root_descriptor(&descriptor_write, + root_parameter, bindings->descriptor_sets[0], NULL, &buffer_info); + VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); + + assert(index < ARRAY_SIZE(bindings->push_descriptors)); + bindings->push_descriptors[index].u.cbv.vk_buffer = buffer_info.buffer; + bindings->push_descriptors[index].u.cbv.offset = buffer_info.offset; + bindings->push_descriptor_dirty_mask |= 1u << index; + bindings->push_descriptor_active_mask |= 1u << index; + } +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView( + ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); + + d3d12_command_list_set_root_cbv(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE, root_parameter_index, address); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView( + ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); + + d3d12_command_list_set_root_cbv(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS, root_parameter_index, address); +} + +static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point, unsigned int index, D3D12_GPU_VIRTUAL_ADDRESS gpu_address) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct d3d12_root_signature *root_signature = bindings->root_signature; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; + const struct d3d12_root_parameter *root_parameter; + struct VkWriteDescriptorSet descriptor_write; + VkDevice vk_device = list->device->vk_device; + VkBufferView vk_buffer_view; + + root_parameter = root_signature_get_root_descriptor(root_signature, index); + assert(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); + + /* FIXME: Re-use buffer views. */ + if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, &vk_buffer_view)) + { + ERR("Failed to create buffer view.\n"); + return; + } + + if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) + { + ERR("Failed to add buffer view.\n"); + VK_CALL(vkDestroyBufferView(vk_device, vk_buffer_view, NULL)); + return; + } + + if (vk_info->KHR_push_descriptor) + { + vk_write_descriptor_set_from_root_descriptor(&descriptor_write, + root_parameter, VK_NULL_HANDLE, &vk_buffer_view, NULL); + VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point, + root_signature->vk_pipeline_layout, 0, 1, &descriptor_write)); + } + else + { + d3d12_command_list_prepare_descriptors(list, bind_point); + vk_write_descriptor_set_from_root_descriptor(&descriptor_write, + root_parameter, bindings->descriptor_sets[0], &vk_buffer_view, NULL); + VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); + + assert(index < ARRAY_SIZE(bindings->push_descriptors)); + bindings->push_descriptors[index].u.vk_buffer_view = vk_buffer_view; + bindings->push_descriptor_dirty_mask |= 1u << index; + bindings->push_descriptor_active_mask |= 1u << index; + } +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView( + ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); + + d3d12_command_list_set_root_descriptor(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE, + root_parameter_index, address); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView( + ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); + + d3d12_command_list_set_root_descriptor(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS, + root_parameter_index, address); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView( + ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); + + d3d12_command_list_set_root_descriptor(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE, + root_parameter_index, address); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView( + ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); + + d3d12_command_list_set_root_descriptor(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS, + root_parameter_index, address); +} + +static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList2 *iface, + const D3D12_INDEX_BUFFER_VIEW *view) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct vkd3d_vk_device_procs *vk_procs; + struct d3d12_resource *resource; + enum VkIndexType index_type; + + TRACE("iface %p, view %p.\n", iface, view); + + if (!view) + { + WARN("Ignoring NULL index buffer view.\n"); + return; + } + + vk_procs = &list->device->vk_procs; + + switch (view->Format) + { + case DXGI_FORMAT_R16_UINT: + index_type = VK_INDEX_TYPE_UINT16; + break; + case DXGI_FORMAT_R32_UINT: + index_type = VK_INDEX_TYPE_UINT32; + break; + default: + WARN("Invalid index format %#x.\n", view->Format); + return; + } + + list->index_buffer_format = view->Format; + + resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, view->BufferLocation); + VK_CALL(vkCmdBindIndexBuffer(list->vk_command_buffer, resource->u.vk_buffer, + view->BufferLocation - resource->gpu_address, index_type)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList2 *iface, + UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct vkd3d_null_resources *null_resources; + struct vkd3d_gpu_va_allocator *gpu_va_allocator; + VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; + const struct vkd3d_vk_device_procs *vk_procs; + VkBuffer buffers[ARRAY_SIZE(list->strides)]; + struct d3d12_resource *resource; + bool invalidate = false; + unsigned int i, stride; + + TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views); + + vk_procs = &list->device->vk_procs; + null_resources = &list->device->null_resources; + gpu_va_allocator = &list->device->gpu_va_allocator; + + if (!vkd3d_bound_range(start_slot, view_count, ARRAY_SIZE(list->strides))) + { + WARN("Invalid start slot %u / view count %u.\n", start_slot, view_count); + return; + } + + if (!views) + { + WARN("NULL \"views\" pointer specified.\n"); + return; + } + + for (i = 0; i < view_count; ++i) + { + if (views[i].BufferLocation) + { + resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferLocation); + buffers[i] = resource->u.vk_buffer; + offsets[i] = views[i].BufferLocation - resource->gpu_address; + stride = views[i].StrideInBytes; + } + else + { + buffers[i] = null_resources->vk_buffer; + offsets[i] = 0; + stride = 0; + } + + invalidate |= list->strides[start_slot + i] != stride; + list->strides[start_slot + i] = stride; + } + + if (view_count) + VK_CALL(vkCmdBindVertexBuffers(list->vk_command_buffer, start_slot, view_count, buffers, offsets)); + + if (invalidate) + d3d12_command_list_invalidate_current_pipeline(list); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList2 *iface, + UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)]; + VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)]; + VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)]; + struct vkd3d_gpu_va_allocator *gpu_va_allocator; + const struct vkd3d_vk_device_procs *vk_procs; + struct d3d12_resource *resource; + unsigned int i, first, count; + + TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views); + + d3d12_command_list_end_current_render_pass(list); + + if (!list->device->vk_info.EXT_transform_feedback) + { + FIXME("Transform feedback is not supported by Vulkan implementation.\n"); + return; + } + + if (!vkd3d_bound_range(start_slot, view_count, ARRAY_SIZE(buffers))) + { + WARN("Invalid start slot %u / view count %u.\n", start_slot, view_count); + return; + } + + vk_procs = &list->device->vk_procs; + gpu_va_allocator = &list->device->gpu_va_allocator; + + count = 0; + first = start_slot; + for (i = 0; i < view_count; ++i) + { + if (views[i].BufferLocation && views[i].SizeInBytes) + { + resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferLocation); + buffers[count] = resource->u.vk_buffer; + offsets[count] = views[i].BufferLocation - resource->gpu_address; + sizes[count] = views[i].SizeInBytes; + + resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferFilledSizeLocation); + list->so_counter_buffers[start_slot + i] = resource->u.vk_buffer; + list->so_counter_buffer_offsets[start_slot + i] = views[i].BufferFilledSizeLocation - resource->gpu_address; + ++count; + } + else + { + if (count) + VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); + count = 0; + first = start_slot + i + 1; + + list->so_counter_buffers[start_slot + i] = VK_NULL_HANDLE; + list->so_counter_buffer_offsets[start_slot + i] = 0; + + WARN("Trying to unbind transform feedback buffer %u. Ignoring.\n", start_slot + i); + } + } + + if (count) + VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList2 *iface, + UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors, + BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct d3d12_rtv_desc *rtv_desc; + const struct d3d12_dsv_desc *dsv_desc; + VkFormat prev_dsv_format; + struct vkd3d_view *view; + unsigned int i; + + TRACE("iface %p, render_target_descriptor_count %u, render_target_descriptors %p, " + "single_descriptor_handle %#x, depth_stencil_descriptor %p.\n", + iface, render_target_descriptor_count, render_target_descriptors, + single_descriptor_handle, depth_stencil_descriptor); + + if (render_target_descriptor_count > ARRAY_SIZE(list->rtvs)) + { + WARN("Descriptor count %u > %zu, ignoring extra descriptors.\n", + render_target_descriptor_count, ARRAY_SIZE(list->rtvs)); + render_target_descriptor_count = ARRAY_SIZE(list->rtvs); + } + + list->fb_width = 0; + list->fb_height = 0; + list->fb_layer_count = 0; + for (i = 0; i < render_target_descriptor_count; ++i) + { + if (single_descriptor_handle) + { + if ((rtv_desc = d3d12_rtv_desc_from_cpu_handle(*render_target_descriptors))) + rtv_desc += i; + } + else + { + rtv_desc = d3d12_rtv_desc_from_cpu_handle(render_target_descriptors[i]); + } + + if (!rtv_desc || !rtv_desc->resource) + { + WARN("RTV descriptor %u is not initialized.\n", i); + list->rtvs[i] = VK_NULL_HANDLE; + continue; + } + + d3d12_command_list_track_resource_usage(list, rtv_desc->resource); + + /* In D3D12 CPU descriptors are consumed when a command is recorded. */ + view = rtv_desc->view; + if (!d3d12_command_allocator_add_view(list->allocator, view)) + { + WARN("Failed to add view.\n"); + } + + list->rtvs[i] = view->u.vk_image_view; + list->fb_width = max(list->fb_width, rtv_desc->width); + list->fb_height = max(list->fb_height, rtv_desc->height); + list->fb_layer_count = max(list->fb_layer_count, rtv_desc->layer_count); + } + + prev_dsv_format = list->dsv_format; + list->dsv = VK_NULL_HANDLE; + list->dsv_format = VK_FORMAT_UNDEFINED; + if (depth_stencil_descriptor) + { + if ((dsv_desc = d3d12_dsv_desc_from_cpu_handle(*depth_stencil_descriptor)) + && dsv_desc->resource) + { + d3d12_command_list_track_resource_usage(list, dsv_desc->resource); + + /* In D3D12 CPU descriptors are consumed when a command is recorded. */ + view = dsv_desc->view; + if (!d3d12_command_allocator_add_view(list->allocator, view)) + { + WARN("Failed to add view.\n"); + list->dsv = VK_NULL_HANDLE; + } + + list->dsv = view->u.vk_image_view; + list->fb_width = max(list->fb_width, dsv_desc->width); + list->fb_height = max(list->fb_height, dsv_desc->height); + list->fb_layer_count = max(list->fb_layer_count, dsv_desc->layer_count); + list->dsv_format = dsv_desc->format->vk_format; + } + else + { + WARN("DSV descriptor is not initialized.\n"); + } + } + + if (prev_dsv_format != list->dsv_format && d3d12_pipeline_state_has_unknown_dsv_format(list->state)) + d3d12_command_list_invalidate_current_pipeline(list); + + d3d12_command_list_invalidate_current_framebuffer(list); + d3d12_command_list_invalidate_current_render_pass(list); +} + +static void d3d12_command_list_clear(struct d3d12_command_list *list, + const struct VkAttachmentDescription *attachment_desc, + const struct VkAttachmentReference *color_reference, const struct VkAttachmentReference *ds_reference, + struct vkd3d_view *view, size_t width, size_t height, unsigned int layer_count, + const union VkClearValue *clear_value, unsigned int rect_count, const D3D12_RECT *rects) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + struct VkSubpassDescription sub_pass_desc; + struct VkRenderPassCreateInfo pass_desc; + struct VkRenderPassBeginInfo begin_desc; + struct VkFramebufferCreateInfo fb_desc; + VkFramebuffer vk_framebuffer; + VkRenderPass vk_render_pass; + D3D12_RECT full_rect; + unsigned int i; + VkResult vr; + + d3d12_command_list_end_current_render_pass(list); + + if (!rect_count) + { + full_rect.top = 0; + full_rect.left = 0; + full_rect.bottom = height; + full_rect.right = width; + + rect_count = 1; + rects = &full_rect; + } + + sub_pass_desc.flags = 0; + sub_pass_desc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + sub_pass_desc.inputAttachmentCount = 0; + sub_pass_desc.pInputAttachments = NULL; + sub_pass_desc.colorAttachmentCount = !!color_reference; + sub_pass_desc.pColorAttachments = color_reference; + sub_pass_desc.pResolveAttachments = NULL; + sub_pass_desc.pDepthStencilAttachment = ds_reference; + sub_pass_desc.preserveAttachmentCount = 0; + sub_pass_desc.pPreserveAttachments = NULL; + + pass_desc.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + pass_desc.pNext = NULL; + pass_desc.flags = 0; + pass_desc.attachmentCount = 1; + pass_desc.pAttachments = attachment_desc; + pass_desc.subpassCount = 1; + pass_desc.pSubpasses = &sub_pass_desc; + pass_desc.dependencyCount = 0; + pass_desc.pDependencies = NULL; + if ((vr = VK_CALL(vkCreateRenderPass(list->device->vk_device, &pass_desc, NULL, &vk_render_pass))) < 0) + { + WARN("Failed to create Vulkan render pass, vr %d.\n", vr); + return; + } + + if (!d3d12_command_allocator_add_render_pass(list->allocator, vk_render_pass)) + { + WARN("Failed to add render pass.\n"); + VK_CALL(vkDestroyRenderPass(list->device->vk_device, vk_render_pass, NULL)); + return; + } + + if (!d3d12_command_allocator_add_view(list->allocator, view)) + { + WARN("Failed to add view.\n"); + } + + fb_desc.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + fb_desc.pNext = NULL; + fb_desc.flags = 0; + fb_desc.renderPass = vk_render_pass; + fb_desc.attachmentCount = 1; + fb_desc.pAttachments = &view->u.vk_image_view; + fb_desc.width = width; + fb_desc.height = height; + fb_desc.layers = layer_count; + if ((vr = VK_CALL(vkCreateFramebuffer(list->device->vk_device, &fb_desc, NULL, &vk_framebuffer))) < 0) + { + WARN("Failed to create Vulkan framebuffer, vr %d.\n", vr); + return; + } + + if (!d3d12_command_allocator_add_framebuffer(list->allocator, vk_framebuffer)) + { + WARN("Failed to add framebuffer.\n"); + VK_CALL(vkDestroyFramebuffer(list->device->vk_device, vk_framebuffer, NULL)); + return; + } + + begin_desc.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + begin_desc.pNext = NULL; + begin_desc.renderPass = vk_render_pass; + begin_desc.framebuffer = vk_framebuffer; + begin_desc.clearValueCount = 1; + begin_desc.pClearValues = clear_value; + + for (i = 0; i < rect_count; ++i) + { + begin_desc.renderArea.offset.x = rects[i].left; + begin_desc.renderArea.offset.y = rects[i].top; + begin_desc.renderArea.extent.width = rects[i].right - rects[i].left; + begin_desc.renderArea.extent.height = rects[i].bottom - rects[i].top; + VK_CALL(vkCmdBeginRenderPass(list->vk_command_buffer, &begin_desc, VK_SUBPASS_CONTENTS_INLINE)); + VK_CALL(vkCmdEndRenderPass(list->vk_command_buffer)); + } +} + +static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList2 *iface, + D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil, + UINT rect_count, const D3D12_RECT *rects) +{ + const union VkClearValue clear_value = {.depthStencil = {depth, stencil}}; + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv); + struct VkAttachmentDescription attachment_desc; + struct VkAttachmentReference ds_reference; + + TRACE("iface %p, dsv %#lx, flags %#x, depth %.8e, stencil 0x%02x, rect_count %u, rects %p.\n", + iface, dsv.ptr, flags, depth, stencil, rect_count, rects); + + d3d12_command_list_track_resource_usage(list, dsv_desc->resource); + + attachment_desc.flags = 0; + attachment_desc.format = dsv_desc->format->vk_format; + attachment_desc.samples = dsv_desc->sample_count; + if (flags & D3D12_CLEAR_FLAG_DEPTH) + { + attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + } + else + { + attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + } + if (flags & D3D12_CLEAR_FLAG_STENCIL) + { + attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + } + else + { + attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + } + attachment_desc.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachment_desc.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + ds_reference.attachment = 0; + ds_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + d3d12_command_list_clear(list, &attachment_desc, NULL, &ds_reference, + dsv_desc->view, dsv_desc->width, dsv_desc->height, dsv_desc->layer_count, + &clear_value, rect_count, rects); +} + +static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList2 *iface, + D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv); + struct VkAttachmentDescription attachment_desc; + struct VkAttachmentReference color_reference; + VkClearValue clear_value; + + TRACE("iface %p, rtv %#lx, color %p, rect_count %u, rects %p.\n", + iface, rtv.ptr, color, rect_count, rects); + + d3d12_command_list_track_resource_usage(list, rtv_desc->resource); + + attachment_desc.flags = 0; + attachment_desc.format = rtv_desc->format->vk_format; + attachment_desc.samples = rtv_desc->sample_count; + attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachment_desc.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachment_desc.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + color_reference.attachment = 0; + color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + if (rtv_desc->format->type == VKD3D_FORMAT_TYPE_UINT) + { + clear_value.color.uint32[0] = max(0, color[0]); + clear_value.color.uint32[1] = max(0, color[1]); + clear_value.color.uint32[2] = max(0, color[2]); + clear_value.color.uint32[3] = max(0, color[3]); + } + else if (rtv_desc->format->type == VKD3D_FORMAT_TYPE_SINT) + { + clear_value.color.int32[0] = color[0]; + clear_value.color.int32[1] = color[1]; + clear_value.color.int32[2] = color[2]; + clear_value.color.int32[3] = color[3]; + } + else + { + clear_value.color.float32[0] = color[0]; + clear_value.color.float32[1] = color[1]; + clear_value.color.float32[2] = color[2]; + clear_value.color.float32[3] = color[3]; + } + + d3d12_command_list_clear(list, &attachment_desc, &color_reference, NULL, + rtv_desc->view, rtv_desc->width, rtv_desc->height, rtv_desc->layer_count, + &clear_value, rect_count, rects); +} + +struct vkd3d_uav_clear_pipeline +{ + VkDescriptorSetLayout vk_set_layout; + VkPipelineLayout vk_pipeline_layout; + VkPipeline vk_pipeline; + VkExtent3D group_size; +}; + +static void vkd3d_uav_clear_state_get_buffer_pipeline(const struct vkd3d_uav_clear_state *state, + enum vkd3d_format_type format_type, struct vkd3d_uav_clear_pipeline *info) +{ + const struct vkd3d_uav_clear_pipelines *pipelines; + + pipelines = format_type == VKD3D_FORMAT_TYPE_UINT ? &state->pipelines_uint : &state->pipelines_float; + info->vk_set_layout = state->vk_set_layout_buffer; + info->vk_pipeline_layout = state->vk_pipeline_layout_buffer; + info->vk_pipeline = pipelines->buffer; + info->group_size = (VkExtent3D){128, 1, 1}; +} + +static void vkd3d_uav_clear_state_get_image_pipeline(const struct vkd3d_uav_clear_state *state, + VkImageViewType image_view_type, enum vkd3d_format_type format_type, struct vkd3d_uav_clear_pipeline *info) +{ + const struct vkd3d_uav_clear_pipelines *pipelines; + + pipelines = format_type == VKD3D_FORMAT_TYPE_UINT ? &state->pipelines_uint : &state->pipelines_float; + info->vk_set_layout = state->vk_set_layout_image; + info->vk_pipeline_layout = state->vk_pipeline_layout_image; + + switch (image_view_type) + { + case VK_IMAGE_VIEW_TYPE_1D: + info->vk_pipeline = pipelines->image_1d; + info->group_size = (VkExtent3D){64, 1, 1}; + break; + + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + info->vk_pipeline = pipelines->image_1d_array; + info->group_size = (VkExtent3D){64, 1, 1}; + break; + + case VK_IMAGE_VIEW_TYPE_2D: + info->vk_pipeline = pipelines->image_2d; + info->group_size = (VkExtent3D){8, 8, 1}; + break; + + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + info->vk_pipeline = pipelines->image_2d_array; + info->group_size = (VkExtent3D){8, 8, 1}; + break; + + case VK_IMAGE_VIEW_TYPE_3D: + info->vk_pipeline = pipelines->image_3d; + info->group_size = (VkExtent3D){8, 8, 1}; + break; + + default: + ERR("Unhandled view type %#x.\n", image_view_type); + info->vk_pipeline = VK_NULL_HANDLE; + info->group_size = (VkExtent3D){0, 0, 0}; + break; + } +} + +static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, + struct d3d12_resource *resource, struct vkd3d_view *view, const VkClearColorValue *clear_colour, + unsigned int rect_count, const D3D12_RECT *rects) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + unsigned int i, miplevel_idx, layer_count; + struct vkd3d_uav_clear_pipeline pipeline; + struct vkd3d_uav_clear_args clear_args; + VkDescriptorImageInfo image_info; + D3D12_RECT full_rect, curr_rect; + VkWriteDescriptorSet write_set; + + d3d12_command_list_track_resource_usage(list, resource); + d3d12_command_list_end_current_render_pass(list); + + d3d12_command_list_invalidate_current_pipeline(list); + d3d12_command_list_invalidate_bindings(list, list->state); + d3d12_command_list_invalidate_root_parameters(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); + + if (!d3d12_command_allocator_add_view(list->allocator, view)) + WARN("Failed to add view.\n"); + + clear_args.colour = *clear_colour; + + write_set.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_set.pNext = NULL; + write_set.dstBinding = 0; + write_set.dstArrayElement = 0; + write_set.descriptorCount = 1; + + if (d3d12_resource_is_buffer(resource)) + { + write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + write_set.pImageInfo = NULL; + write_set.pBufferInfo = NULL; + write_set.pTexelBufferView = &view->u.vk_buffer_view; + + miplevel_idx = 0; + layer_count = 1; + vkd3d_uav_clear_state_get_buffer_pipeline(&list->device->uav_clear_state, + view->format->type, &pipeline); + } + else + { + image_info.sampler = VK_NULL_HANDLE; + image_info.imageView = view->u.vk_image_view; + image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + write_set.pImageInfo = &image_info; + write_set.pBufferInfo = NULL; + write_set.pTexelBufferView = NULL; + + miplevel_idx = view->info.texture.miplevel_idx; + layer_count = view->info.texture.vk_view_type == VK_IMAGE_VIEW_TYPE_3D + ? d3d12_resource_desc_get_depth(&resource->desc, miplevel_idx) + : view->info.texture.layer_count; + vkd3d_uav_clear_state_get_image_pipeline(&list->device->uav_clear_state, + view->info.texture.vk_view_type, view->format->type, &pipeline); + } + + if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set( + list->allocator, pipeline.vk_set_layout, 0, false))) + { + ERR("Failed to allocate descriptor set.\n"); + return; + } + + VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &write_set, 0, NULL)); + + full_rect.left = 0; + full_rect.right = d3d12_resource_desc_get_width(&resource->desc, miplevel_idx); + full_rect.top = 0; + full_rect.bottom = d3d12_resource_desc_get_height(&resource->desc, miplevel_idx); + + if (!rect_count) + { + rects = &full_rect; + rect_count = 1; + } + + VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.vk_pipeline)); + + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline.vk_pipeline_layout, 0, 1, &write_set.dstSet, 0, NULL)); + + for (i = 0; i < rect_count; ++i) + { + /* Clamp to the actual resource region and skip empty rectangles. */ + curr_rect.left = max(rects[i].left, full_rect.left); + curr_rect.top = max(rects[i].top, full_rect.top); + curr_rect.right = min(rects[i].right, full_rect.right); + curr_rect.bottom = min(rects[i].bottom, full_rect.bottom); + + if (curr_rect.left >= curr_rect.right || curr_rect.top >= curr_rect.bottom) + continue; + + clear_args.offset.x = curr_rect.left; + clear_args.offset.y = curr_rect.top; + clear_args.extent.width = curr_rect.right - curr_rect.left; + clear_args.extent.height = curr_rect.bottom - curr_rect.top; + + VK_CALL(vkCmdPushConstants(list->vk_command_buffer, pipeline.vk_pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(clear_args), &clear_args)); + + VK_CALL(vkCmdDispatch(list->vk_command_buffer, + vkd3d_compute_workgroup_count(clear_args.extent.width, pipeline.group_size.width), + vkd3d_compute_workgroup_count(clear_args.extent.height, pipeline.group_size.height), + vkd3d_compute_workgroup_count(layer_count, pipeline.group_size.depth))); + } +} + +static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12_device *device, + DXGI_FORMAT dxgi_format, VkClearColorValue *colour) +{ + switch (dxgi_format) + { + case DXGI_FORMAT_R11G11B10_FLOAT: + colour->uint32[0] = (colour->uint32[0] & 0x7ff) + | ((colour->uint32[1] & 0x7ff) << 11) + | ((colour->uint32[2] & 0x3ff) << 22); + return vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); + + default: + return NULL; + } +} + +static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList2 *iface, + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, + const UINT values[4], UINT rect_count, const D3D12_RECT *rects) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_device *device = list->device; + struct vkd3d_view *view, *uint_view = NULL; + struct vkd3d_texture_view_desc view_desc; + const struct vkd3d_format *uint_format; + struct d3d12_resource *resource_impl; + VkClearColorValue colour; + + TRACE("iface %p, gpu_handle %#"PRIx64", cpu_handle %lx, resource %p, values %p, rect_count %u, rects %p.\n", + iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); + + resource_impl = unsafe_impl_from_ID3D12Resource(resource); + view = d3d12_desc_from_cpu_handle(cpu_handle)->u.view; + memcpy(colour.uint32, values, sizeof(colour.uint32)); + + if (view->format->type != VKD3D_FORMAT_TYPE_UINT) + { + if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) + && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, &colour))) + { + ERR("Unhandled format %#x.\n", view->format->dxgi_format); + return; + } + + if (d3d12_resource_is_buffer(resource_impl)) + { + if (!vkd3d_create_buffer_view(device, resource_impl->u.vk_buffer, uint_format, + view->info.buffer.offset, view->info.buffer.size, &uint_view)) + { + ERR("Failed to create buffer view.\n"); + return; + } + } + else + { + memset(&view_desc, 0, sizeof(view_desc)); + view_desc.view_type = view->info.texture.vk_view_type; + view_desc.format = uint_format; + view_desc.miplevel_idx = view->info.texture.miplevel_idx; + view_desc.miplevel_count = 1; + view_desc.layer_idx = view->info.texture.layer_idx; + view_desc.layer_count = view->info.texture.layer_count; + + if (!vkd3d_create_texture_view(device, resource_impl->u.vk_image, &view_desc, &uint_view)) + { + ERR("Failed to create image view.\n"); + return; + } + } + view = uint_view; + } + + d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); + + if (uint_view) + vkd3d_view_decref(uint_view, device); +} + +static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList2 *iface, + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, + const float values[4], UINT rect_count, const D3D12_RECT *rects) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_resource *resource_impl; + VkClearColorValue colour; + struct vkd3d_view *view; + + TRACE("iface %p, gpu_handle %#"PRIx64", cpu_handle %lx, resource %p, values %p, rect_count %u, rects %p.\n", + iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); + + resource_impl = unsafe_impl_from_ID3D12Resource(resource); + view = d3d12_desc_from_cpu_handle(cpu_handle)->u.view; + memcpy(colour.float32, values, sizeof(colour.float32)); + + d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); +} + +static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList2 *iface, + ID3D12Resource *resource, const D3D12_DISCARD_REGION *region) +{ + FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region); +} + +static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList2 *iface, + ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); + const struct vkd3d_vk_device_procs *vk_procs; + VkQueryControlFlags flags = 0; + + TRACE("iface %p, heap %p, type %#x, index %u.\n", iface, heap, type, index); + + vk_procs = &list->device->vk_procs; + + d3d12_command_list_end_current_render_pass(list); + + VK_CALL(vkCmdResetQueryPool(list->vk_command_buffer, query_heap->vk_query_pool, index, 1)); + + if (type == D3D12_QUERY_TYPE_OCCLUSION) + flags = VK_QUERY_CONTROL_PRECISE_BIT; + + if (D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 <= type && type <= D3D12_QUERY_TYPE_SO_STATISTICS_STREAM3) + { + unsigned int stream_index = type - D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0; + VK_CALL(vkCmdBeginQueryIndexedEXT(list->vk_command_buffer, + query_heap->vk_query_pool, index, flags, stream_index)); + return; + } + + VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags)); +} + +static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList2 *iface, + ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, heap %p, type %#x, index %u.\n", iface, heap, type, index); + + vk_procs = &list->device->vk_procs; + + d3d12_command_list_end_current_render_pass(list); + + d3d12_query_heap_mark_result_as_available(query_heap, index); + + if (type == D3D12_QUERY_TYPE_TIMESTAMP) + { + VK_CALL(vkCmdResetQueryPool(list->vk_command_buffer, query_heap->vk_query_pool, index, 1)); + VK_CALL(vkCmdWriteTimestamp(list->vk_command_buffer, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, query_heap->vk_query_pool, index)); + return; + } + + if (D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 <= type && type <= D3D12_QUERY_TYPE_SO_STATISTICS_STREAM3) + { + unsigned int stream_index = type - D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0; + VK_CALL(vkCmdEndQueryIndexedEXT(list->vk_command_buffer, + query_heap->vk_query_pool, index, stream_index)); + return; + } + + VK_CALL(vkCmdEndQuery(list->vk_command_buffer, query_heap->vk_query_pool, index)); +} + +static size_t get_query_stride(D3D12_QUERY_TYPE type) +{ + if (type == D3D12_QUERY_TYPE_PIPELINE_STATISTICS) + return sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS); + + if (D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 <= type && type <= D3D12_QUERY_TYPE_SO_STATISTICS_STREAM3) + return sizeof(D3D12_QUERY_DATA_SO_STATISTICS); + + return sizeof(uint64_t); +} + +static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList2 *iface, + ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count, + ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset) +{ + const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer); + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i, first, count; + VkDeviceSize offset, stride; + + TRACE("iface %p, heap %p, type %#x, start_index %u, query_count %u, " + "dst_buffer %p, aligned_dst_buffer_offset %#"PRIx64".\n", + iface, heap, type, start_index, query_count, + dst_buffer, aligned_dst_buffer_offset); + + vk_procs = &list->device->vk_procs; + + /* Vulkan is less strict than D3D12 here. Vulkan implementations are free + * to return any non-zero result for binary occlusion with at least one + * sample passing, while D3D12 guarantees that the result is 1 then. + * + * For example, the Nvidia binary blob drivers on Linux seem to always + * count precisely, even when it was signalled that non-precise is enough. + */ + if (type == D3D12_QUERY_TYPE_BINARY_OCCLUSION) + FIXME_ONCE("D3D12 guarantees binary occlusion queries result in only 0 and 1.\n"); + + if (!d3d12_resource_is_buffer(buffer)) + { + WARN("Destination resource is not a buffer.\n"); + return; + } + + d3d12_command_list_end_current_render_pass(list); + + stride = get_query_stride(type); + + count = 0; + first = start_index; + offset = aligned_dst_buffer_offset; + for (i = 0; i < query_count; ++i) + { + if (d3d12_query_heap_is_result_available(query_heap, start_index + i)) + { + ++count; + } + else + { + if (count) + { + VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer, + query_heap->vk_query_pool, first, count, buffer->u.vk_buffer, + offset, stride, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT)); + } + count = 0; + first = start_index + i; + offset = aligned_dst_buffer_offset + i * stride; + + /* We cannot copy query results if a query was not issued: + * + * "If the query does not become available in a finite amount of + * time (e.g. due to not issuing a query since the last reset), + * a VK_ERROR_DEVICE_LOST error may occur." + */ + VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, + buffer->u.vk_buffer, offset, stride, 0x00000000)); + + ++first; + offset += stride; + } + } + + if (count) + { + VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer, + query_heap->vk_query_pool, first, count, buffer->u.vk_buffer, + offset, stride, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT)); + } +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList2 *iface, + ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer); + const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, buffer %p, aligned_buffer_offset %#"PRIx64", operation %#x.\n", + iface, buffer, aligned_buffer_offset, operation); + + if (!vk_info->EXT_conditional_rendering) + { + FIXME("Vulkan conditional rendering extension not present. Conditional rendering not supported.\n"); + return; + } + + vk_procs = &list->device->vk_procs; + + /* FIXME: Add support for conditional rendering in render passes. */ + d3d12_command_list_end_current_render_pass(list); + + if (resource) + { + VkConditionalRenderingBeginInfoEXT cond_info; + + if (aligned_buffer_offset & (sizeof(uint64_t) - 1)) + { + WARN("Unaligned predicate argument buffer offset %#"PRIx64".\n", aligned_buffer_offset); + return; + } + + if (!d3d12_resource_is_buffer(resource)) + { + WARN("Predicate arguments must be stored in a buffer resource.\n"); + return; + } + + FIXME_ONCE("Predication doesn't support clear and copy commands, " + "and predication values are treated as 32-bit values.\n"); + + cond_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; + cond_info.pNext = NULL; + cond_info.buffer = resource->u.vk_buffer; + cond_info.offset = aligned_buffer_offset; + switch (operation) + { + case D3D12_PREDICATION_OP_EQUAL_ZERO: + cond_info.flags = 0; + break; + + case D3D12_PREDICATION_OP_NOT_EQUAL_ZERO: + cond_info.flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; + break; + + default: + FIXME("Unhandled predication operation %#x.\n", operation); + return; + } + + if (list->is_predicated) + VK_CALL(vkCmdEndConditionalRenderingEXT(list->vk_command_buffer)); + VK_CALL(vkCmdBeginConditionalRenderingEXT(list->vk_command_buffer, &cond_info)); + list->is_predicated = true; + } + else if (list->is_predicated) + { + VK_CALL(vkCmdEndConditionalRenderingEXT(list->vk_command_buffer)); + list->is_predicated = false; + } +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList2 *iface, + UINT metadata, const void *data, UINT size) +{ + FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); +} + +static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList2 *iface, + UINT metadata, const void *data, UINT size) +{ + FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); +} + +static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList2 *iface) +{ + FIXME("iface %p stub!\n", iface); +} + +STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMENTS)); +STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS)); +STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS)); + +static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList2 *iface, + ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer, + UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset) +{ + struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature); + struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer); + struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + const D3D12_COMMAND_SIGNATURE_DESC *signature_desc; + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i; + + TRACE("iface %p, command_signature %p, max_command_count %u, arg_buffer %p, " + "arg_buffer_offset %#"PRIx64", count_buffer %p, count_buffer_offset %#"PRIx64".\n", + iface, command_signature, max_command_count, arg_buffer, arg_buffer_offset, + count_buffer, count_buffer_offset); + + vk_procs = &list->device->vk_procs; + + if (count_buffer && !list->device->vk_info.KHR_draw_indirect_count) + { + FIXME("Count buffers not supported by Vulkan implementation.\n"); + return; + } + + signature_desc = &sig_impl->desc; + for (i = 0; i < signature_desc->NumArgumentDescs; ++i) + { + const D3D12_INDIRECT_ARGUMENT_DESC *arg_desc = &signature_desc->pArgumentDescs[i]; + + switch (arg_desc->Type) + { + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: + if (!d3d12_command_list_begin_render_pass(list)) + { + WARN("Failed to begin render pass, ignoring draw.\n"); + break; + } + + if (count_buffer) + { + VK_CALL(vkCmdDrawIndirectCountKHR(list->vk_command_buffer, arg_impl->u.vk_buffer, + arg_buffer_offset, count_impl->u.vk_buffer, count_buffer_offset, + max_command_count, signature_desc->ByteStride)); + } + else + { + VK_CALL(vkCmdDrawIndirect(list->vk_command_buffer, arg_impl->u.vk_buffer, + arg_buffer_offset, max_command_count, signature_desc->ByteStride)); + } + break; + + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED: + if (!d3d12_command_list_begin_render_pass(list)) + { + WARN("Failed to begin render pass, ignoring draw.\n"); + break; + } + + d3d12_command_list_check_index_buffer_strip_cut_value(list); + + if (count_buffer) + { + VK_CALL(vkCmdDrawIndexedIndirectCountKHR(list->vk_command_buffer, arg_impl->u.vk_buffer, + arg_buffer_offset, count_impl->u.vk_buffer, count_buffer_offset, + max_command_count, signature_desc->ByteStride)); + } + else + { + VK_CALL(vkCmdDrawIndexedIndirect(list->vk_command_buffer, arg_impl->u.vk_buffer, + arg_buffer_offset, max_command_count, signature_desc->ByteStride)); + } + break; + + case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: + if (max_command_count != 1) + FIXME("Ignoring command count %u.\n", max_command_count); + + if (count_buffer) + { + FIXME("Count buffers not supported for indirect dispatch.\n"); + break; + } + + if (!d3d12_command_list_update_compute_state(list)) + { + WARN("Failed to update compute state, ignoring dispatch.\n"); + return; + } + + VK_CALL(vkCmdDispatchIndirect(list->vk_command_buffer, + arg_impl->u.vk_buffer, arg_buffer_offset)); + break; + + default: + FIXME("Ignoring unhandled argument type %#x.\n", arg_desc->Type); + break; + } + } +} + +static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList2 *iface, + ID3D12Resource *dst_buffer, UINT64 dst_offset, + ID3D12Resource *src_buffer, UINT64 src_offset, + UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, + const D3D12_SUBRESOURCE_RANGE_UINT64 *dependent_sub_resource_ranges) +{ + FIXME("iface %p, dst_resource %p, dst_offset %#"PRIx64", src_resource %p, " + "src_offset %#"PRIx64", dependent_resource_count %u, " + "dependent_resources %p, dependent_sub_resource_ranges %p stub!\n", + iface, dst_buffer, dst_offset, src_buffer, src_offset, + dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); +} + +static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList2 *iface, + ID3D12Resource *dst_buffer, UINT64 dst_offset, + ID3D12Resource *src_buffer, UINT64 src_offset, + UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, + const D3D12_SUBRESOURCE_RANGE_UINT64 *dependent_sub_resource_ranges) +{ + FIXME("iface %p, dst_resource %p, dst_offset %#"PRIx64", src_resource %p, " + "src_offset %#"PRIx64", dependent_resource_count %u, " + "dependent_resources %p, dependent_sub_resource_ranges %p stub!\n", + iface, dst_buffer, dst_offset, src_buffer, src_offset, + dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); +} + +static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList2 *iface, + FLOAT min, FLOAT max) +{ + FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList2 *iface, + UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions) +{ + FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n", + iface, sample_count, pixel_count, sample_positions); +} + +static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList2 *iface, + ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y, + ID3D12Resource *src_resource, UINT src_sub_resource_idx, + D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode) +{ + FIXME("iface %p, dst_resource %p, dst_sub_resource_idx %u, " + "dst_x %u, dst_y %u, src_resource %p, src_sub_resource_idx %u, " + "src_rect %p, format %#x, mode %#x stub!\n", + iface, dst_resource, dst_sub_resource_idx, dst_x, dst_y, + src_resource, src_sub_resource_idx, src_rect, format, mode); +} + +static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList2 *iface, UINT mask) +{ + FIXME("iface %p, mask %#x stub!\n", iface, mask); +} + +static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList2 *iface, + UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters, + const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes) +{ + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_resource *resource; + unsigned int i; + + FIXME("iface %p, count %u, parameters %p, modes %p stub!\n", iface, count, parameters, modes); + + for (i = 0; i < count; ++i) + { + resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, parameters[i].Dest); + d3d12_command_list_track_resource_usage(list, resource); + } +} + +static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = +{ + /* IUnknown methods */ + d3d12_command_list_QueryInterface, + d3d12_command_list_AddRef, + d3d12_command_list_Release, + /* ID3D12Object methods */ + d3d12_command_list_GetPrivateData, + d3d12_command_list_SetPrivateData, + d3d12_command_list_SetPrivateDataInterface, + d3d12_command_list_SetName, + /* ID3D12DeviceChild methods */ + d3d12_command_list_GetDevice, + /* ID3D12CommandList methods */ + d3d12_command_list_GetType, + /* ID3D12GraphicsCommandList methods */ + d3d12_command_list_Close, + d3d12_command_list_Reset, + d3d12_command_list_ClearState, + d3d12_command_list_DrawInstanced, + d3d12_command_list_DrawIndexedInstanced, + d3d12_command_list_Dispatch, + d3d12_command_list_CopyBufferRegion, + d3d12_command_list_CopyTextureRegion, + d3d12_command_list_CopyResource, + d3d12_command_list_CopyTiles, + d3d12_command_list_ResolveSubresource, + d3d12_command_list_IASetPrimitiveTopology, + d3d12_command_list_RSSetViewports, + d3d12_command_list_RSSetScissorRects, + d3d12_command_list_OMSetBlendFactor, + d3d12_command_list_OMSetStencilRef, + d3d12_command_list_SetPipelineState, + d3d12_command_list_ResourceBarrier, + d3d12_command_list_ExecuteBundle, + d3d12_command_list_SetDescriptorHeaps, + d3d12_command_list_SetComputeRootSignature, + d3d12_command_list_SetGraphicsRootSignature, + d3d12_command_list_SetComputeRootDescriptorTable, + d3d12_command_list_SetGraphicsRootDescriptorTable, + d3d12_command_list_SetComputeRoot32BitConstant, + d3d12_command_list_SetGraphicsRoot32BitConstant, + d3d12_command_list_SetComputeRoot32BitConstants, + d3d12_command_list_SetGraphicsRoot32BitConstants, + d3d12_command_list_SetComputeRootConstantBufferView, + d3d12_command_list_SetGraphicsRootConstantBufferView, + d3d12_command_list_SetComputeRootShaderResourceView, + d3d12_command_list_SetGraphicsRootShaderResourceView, + d3d12_command_list_SetComputeRootUnorderedAccessView, + d3d12_command_list_SetGraphicsRootUnorderedAccessView, + d3d12_command_list_IASetIndexBuffer, + d3d12_command_list_IASetVertexBuffers, + d3d12_command_list_SOSetTargets, + d3d12_command_list_OMSetRenderTargets, + d3d12_command_list_ClearDepthStencilView, + d3d12_command_list_ClearRenderTargetView, + d3d12_command_list_ClearUnorderedAccessViewUint, + d3d12_command_list_ClearUnorderedAccessViewFloat, + d3d12_command_list_DiscardResource, + d3d12_command_list_BeginQuery, + d3d12_command_list_EndQuery, + d3d12_command_list_ResolveQueryData, + d3d12_command_list_SetPredication, + d3d12_command_list_SetMarker, + d3d12_command_list_BeginEvent, + d3d12_command_list_EndEvent, + d3d12_command_list_ExecuteIndirect, + /* ID3D12GraphicsCommandList1 methods */ + d3d12_command_list_AtomicCopyBufferUINT, + d3d12_command_list_AtomicCopyBufferUINT64, + d3d12_command_list_OMSetDepthBounds, + d3d12_command_list_SetSamplePositions, + d3d12_command_list_ResolveSubresourceRegion, + d3d12_command_list_SetViewInstanceMask, + /* ID3D12GraphicsCommandList2 methods */ + d3d12_command_list_WriteBufferImmediate, +}; + +static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface) +{ + if (!iface) + return NULL; + assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); + return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); +} + +static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, + D3D12_COMMAND_LIST_TYPE type, struct d3d12_command_allocator *allocator, + ID3D12PipelineState *initial_pipeline_state) +{ + HRESULT hr; + + list->ID3D12GraphicsCommandList2_iface.lpVtbl = &d3d12_command_list_vtbl; + list->refcount = 1; + + list->type = type; + + if (FAILED(hr = vkd3d_private_store_init(&list->private_store))) + return hr; + + d3d12_device_add_ref(list->device = device); + + list->allocator = allocator; + + if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) + { + list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS].vk_uav_counter_views = NULL; + list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COMPUTE].vk_uav_counter_views = NULL; + d3d12_command_list_reset_state(list, initial_pipeline_state); + } + else + { + vkd3d_private_store_destroy(&list->private_store); + d3d12_device_release(device); + } + + return hr; +} + +HRESULT d3d12_command_list_create(struct d3d12_device *device, + UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *allocator_iface, + ID3D12PipelineState *initial_pipeline_state, struct d3d12_command_list **list) +{ + struct d3d12_command_allocator *allocator; + struct d3d12_command_list *object; + HRESULT hr; + + if (!(allocator = unsafe_impl_from_ID3D12CommandAllocator(allocator_iface))) + { + WARN("Command allocator is NULL.\n"); + return E_INVALIDARG; + } + + if (allocator->type != type) + { + WARN("Command list types do not match (allocator %#x, list %#x).\n", + allocator->type, type); + return E_INVALIDARG; + } + + debug_ignored_node_mask(node_mask); + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_command_list_init(object, device, type, allocator, initial_pipeline_state))) + { + vkd3d_free(object); + return hr; + } + + TRACE("Created command list %p.\n", object); + + *list = object; + + return S_OK; +} + +/* ID3D12CommandQueue */ +static inline struct d3d12_command_queue *impl_from_ID3D12CommandQueue(ID3D12CommandQueue *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueue_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface, + REFIID riid, void **object) +{ + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + + if (IsEqualGUID(riid, &IID_ID3D12CommandQueue) + || IsEqualGUID(riid, &IID_ID3D12Pageable) + || IsEqualGUID(riid, &IID_ID3D12DeviceChild) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { + ID3D12CommandQueue_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + ULONG refcount = InterlockedIncrement(&command_queue->refcount); + + TRACE("%p increasing refcount to %u.\n", command_queue, refcount); + + return refcount; +} + +static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + ULONG refcount = InterlockedDecrement(&command_queue->refcount); + + TRACE("%p decreasing refcount to %u.\n", command_queue, refcount); + + if (!refcount) + { + struct d3d12_device *device = command_queue->device; + + vkd3d_private_store_destroy(&command_queue->private_store); + + vkd3d_free(command_queue); + + d3d12_device_release(device); + } + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetPrivateData(ID3D12CommandQueue *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&command_queue->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_SetPrivateData(ID3D12CommandQueue *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&command_queue->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_SetPrivateDataInterface(ID3D12CommandQueue *iface, + REFGUID guid, const IUnknown *data) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&command_queue->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_SetName(ID3D12CommandQueue *iface, const WCHAR *name) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + VkQueue vk_queue; + HRESULT hr; + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, command_queue->device->wchar_size)); + + if (!(vk_queue = vkd3d_queue_acquire(command_queue->vkd3d_queue))) + { + ERR("Failed to acquire queue %p.\n", command_queue->vkd3d_queue); + return E_FAIL; + } + + hr = vkd3d_set_vk_object_name(command_queue->device, (uint64_t)(uintptr_t)vk_queue, + VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT, name); + vkd3d_queue_release(command_queue->vkd3d_queue); + return hr; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetDevice(ID3D12CommandQueue *iface, REFIID iid, void **device) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(command_queue->device, iid, device); +} + +static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, + ID3D12Resource *resource, UINT region_count, + const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, + const D3D12_TILE_REGION_SIZE *region_sizes, + UINT range_count, + const D3D12_TILE_RANGE_FLAGS *range_flags, + UINT *heap_range_offsets, + UINT *range_tile_counts, + D3D12_TILE_MAPPING_FLAGS flags) +{ + FIXME("iface %p, resource %p, region_count %u, region_start_coordinates %p, " + "region_sizes %p, range_count %u, range_flags %p, heap_range_offsets %p, " + "range_tile_counts %p, flags %#x stub!\n", + iface, resource, region_count, region_start_coordinates, region_sizes, range_count, + range_flags, heap_range_offsets, range_tile_counts, flags); +} + +static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, + ID3D12Resource *dst_resource, + const D3D12_TILED_RESOURCE_COORDINATE *dst_region_start_coordinate, + ID3D12Resource *src_resource, + const D3D12_TILED_RESOURCE_COORDINATE *src_region_start_coordinate, + const D3D12_TILE_REGION_SIZE *region_size, + D3D12_TILE_MAPPING_FLAGS flags) +{ + FIXME("iface %p, dst_resource %p, dst_region_start_coordinate %p, " + "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x stub!\n", + iface, dst_resource, dst_region_start_coordinate, src_resource, + src_region_start_coordinate, region_size, flags); +} + +static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12CommandQueue *iface, + UINT command_list_count, ID3D12CommandList * const *command_lists) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + const struct vkd3d_vk_device_procs *vk_procs; + struct d3d12_command_list *cmd_list; + struct VkSubmitInfo submit_desc; + VkCommandBuffer *buffers; + VkQueue vk_queue; + unsigned int i; + VkResult vr; + + TRACE("iface %p, command_list_count %u, command_lists %p.\n", + iface, command_list_count, command_lists); + + vk_procs = &command_queue->device->vk_procs; + + if (!(buffers = vkd3d_calloc(command_list_count, sizeof(*buffers)))) + { + ERR("Failed to allocate command buffer array.\n"); + return; + } + + for (i = 0; i < command_list_count; ++i) + { + cmd_list = unsafe_impl_from_ID3D12CommandList(command_lists[i]); + + if (cmd_list->is_recording) + { + d3d12_device_mark_as_removed(command_queue->device, DXGI_ERROR_INVALID_CALL, + "Command list %p is in recording state.", command_lists[i]); + vkd3d_free(buffers); + return; + } + + buffers[i] = cmd_list->vk_command_buffer; + } + + submit_desc.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_desc.pNext = NULL; + submit_desc.waitSemaphoreCount = 0; + submit_desc.pWaitSemaphores = NULL; + submit_desc.pWaitDstStageMask = NULL; + submit_desc.commandBufferCount = command_list_count; + submit_desc.pCommandBuffers = buffers; + submit_desc.signalSemaphoreCount = 0; + submit_desc.pSignalSemaphores = NULL; + + if (!(vk_queue = vkd3d_queue_acquire(command_queue->vkd3d_queue))) + { + ERR("Failed to acquire queue %p.\n", command_queue->vkd3d_queue); + vkd3d_free(buffers); + return; + } + + if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_desc, VK_NULL_HANDLE))) < 0) + ERR("Failed to submit queue(s), vr %d.\n", vr); + + vkd3d_queue_release(command_queue->vkd3d_queue); + + vkd3d_free(buffers); +} + +static void STDMETHODCALLTYPE d3d12_command_queue_SetMarker(ID3D12CommandQueue *iface, + UINT metadata, const void *data, UINT size) +{ + FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", + iface, metadata, data, size); +} + +static void STDMETHODCALLTYPE d3d12_command_queue_BeginEvent(ID3D12CommandQueue *iface, + UINT metadata, const void *data, UINT size) +{ + FIXME("iface %p, metatdata %#x, data %p, size %u stub!\n", + iface, metadata, data, size); +} + +static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *iface) +{ + FIXME("iface %p stub!\n", iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface, + ID3D12Fence *fence_iface, UINT64 value) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + const struct vkd3d_vk_device_procs *vk_procs; + VkSemaphore vk_semaphore = VK_NULL_HANDLE; + VkFence vk_fence = VK_NULL_HANDLE; + struct vkd3d_queue *vkd3d_queue; + struct d3d12_device *device; + struct d3d12_fence *fence; + VkSubmitInfo submit_info; + uint64_t sequence_number; + VkQueue vk_queue; + VkResult vr; + HRESULT hr; + + TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); + + device = command_queue->device; + vk_procs = &device->vk_procs; + vkd3d_queue = command_queue->vkd3d_queue; + + fence = unsafe_impl_from_ID3D12Fence(fence_iface); + + if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0) + { + WARN("Failed to create Vulkan fence, vr %d.\n", vr); + goto fail_vkresult; + } + + if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue))) + { + ERR("Failed to acquire queue %p.\n", vkd3d_queue); + hr = E_FAIL; + goto fail; + } + + if ((vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, device, &vk_semaphore)) < 0) + { + ERR("Failed to create Vulkan semaphore, vr %d.\n", vr); + vk_semaphore = VK_NULL_HANDLE; + } + + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = NULL; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = NULL; + submit_info.pWaitDstStageMask = NULL; + submit_info.commandBufferCount = 0; + submit_info.pCommandBuffers = NULL; + submit_info.signalSemaphoreCount = vk_semaphore ? 1 : 0; + submit_info.pSignalSemaphores = &vk_semaphore; + + if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))) >= 0) + { + sequence_number = ++vkd3d_queue->submitted_sequence_number; + + /* We don't expect to overrun the 64-bit counter, but we handle it gracefully anyway. */ + if (!sequence_number) + sequence_number = vkd3d_queue_reset_sequence_number_locked(vkd3d_queue); + } + + vkd3d_queue_release(vkd3d_queue); + + if (vr < 0) + { + WARN("Failed to submit signal operation, vr %d.\n", vr); + goto fail_vkresult; + } + + if (vk_semaphore && SUCCEEDED(hr = d3d12_fence_add_vk_semaphore(fence, vk_semaphore, vk_fence, value))) + vk_semaphore = VK_NULL_HANDLE; + + vr = VK_CALL(vkGetFenceStatus(device->vk_device, vk_fence)); + if (vr == VK_NOT_READY) + { + if (SUCCEEDED(hr = vkd3d_enqueue_gpu_fence(&device->fence_worker, vk_fence, fence, value, vkd3d_queue, sequence_number))) + vk_fence = VK_NULL_HANDLE; + } + else if (vr == VK_SUCCESS) + { + TRACE("Already signaled %p, value %#"PRIx64".\n", fence, value); + hr = d3d12_fence_signal(fence, value, vk_fence); + vk_fence = VK_NULL_HANDLE; + vkd3d_queue_update_sequence_number(vkd3d_queue, sequence_number, device); + } + else + { + FIXME("Failed to get fence status, vr %d.\n", vr); + hr = hresult_from_vk_result(vr); + } + + if (vk_fence || vk_semaphore) + { + /* In case of an unexpected failure, try to safely destroy Vulkan objects. */ + vkd3d_queue_wait_idle(vkd3d_queue, vk_procs); + goto fail; + } + + return hr; + +fail_vkresult: + hr = hresult_from_vk_result(vr); +fail: + VK_CALL(vkDestroyFence(device->vk_device, vk_fence, NULL)); + VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL)); + return hr; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, + ID3D12Fence *fence_iface, UINT64 value) +{ + static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + const struct vkd3d_vk_device_procs *vk_procs; + struct vkd3d_signaled_semaphore *semaphore; + uint64_t completed_value = 0; + struct vkd3d_queue *queue; + struct d3d12_fence *fence; + VkSubmitInfo submit_info; + VkQueue vk_queue; + VkResult vr; + HRESULT hr; + + TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); + + vk_procs = &command_queue->device->vk_procs; + queue = command_queue->vkd3d_queue; + + fence = unsafe_impl_from_ID3D12Fence(fence_iface); + + semaphore = d3d12_fence_acquire_vk_semaphore(fence, value, &completed_value); + if (!semaphore && completed_value >= value) + { + /* We don't get a Vulkan semaphore if the fence was signaled on CPU. */ + TRACE("Already signaled %p, value %#"PRIx64".\n", fence, completed_value); + return S_OK; + } + + if (!(vk_queue = vkd3d_queue_acquire(queue))) + { + ERR("Failed to acquire queue %p.\n", queue); + hr = E_FAIL; + goto fail; + } + + if (!semaphore) + { + if (command_queue->last_waited_fence == fence && command_queue->last_waited_fence_value >= value) + { + WARN("Already waited on fence %p, value %#"PRIx64".\n", fence, value); + } + else + { + FIXME("Failed to acquire Vulkan semaphore for fence %p, value %#"PRIx64 + ", completed value %#"PRIx64".\n", fence, value, completed_value); + } + + vkd3d_queue_release(queue); + return S_OK; + } + + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = NULL; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &semaphore->vk_semaphore; + submit_info.pWaitDstStageMask = &wait_stage_mask; + submit_info.commandBufferCount = 0; + submit_info.pCommandBuffers = NULL; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = NULL; + + if (!vkd3d_array_reserve((void **)&queue->semaphores, &queue->semaphores_size, + queue->semaphore_count + 1, sizeof(*queue->semaphores))) + { + ERR("Failed to allocate memory for semaphore.\n"); + vkd3d_queue_release(queue); + hr = E_OUTOFMEMORY; + goto fail; + } + + if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE))) >= 0) + { + queue->semaphores[queue->semaphore_count].vk_semaphore = semaphore->vk_semaphore; + queue->semaphores[queue->semaphore_count].sequence_number = queue->submitted_sequence_number + 1; + ++queue->semaphore_count; + + command_queue->last_waited_fence = fence; + command_queue->last_waited_fence_value = value; + } + + vkd3d_queue_release(queue); + + if (vr < 0) + { + WARN("Failed to submit wait operation, vr %d.\n", vr); + hr = hresult_from_vk_result(vr); + goto fail; + } + + d3d12_fence_remove_vk_semaphore(fence, semaphore); + return S_OK; + +fail: + d3d12_fence_release_vk_semaphore(fence, semaphore); + return hr; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D12CommandQueue *iface, + UINT64 *frequency) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct d3d12_device *device = command_queue->device; + + TRACE("iface %p, frequency %p.\n", iface, frequency); + + if (!command_queue->vkd3d_queue->timestamp_bits) + { + WARN("Timestamp queries not supported.\n"); + return E_FAIL; + } + + *frequency = 1000000000 / device->vk_info.device_limits.timestampPeriod; + + return S_OK; +} + +#define NANOSECONDS_IN_A_SECOND 1000000000 + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetClockCalibration(ID3D12CommandQueue *iface, + UINT64 *gpu_timestamp, UINT64 *cpu_timestamp) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct d3d12_device *device = command_queue->device; + const struct vkd3d_vk_device_procs *vk_procs; + VkCalibratedTimestampInfoEXT infos[2]; + uint64_t timestamps[2]; + uint64_t deviations[2]; + VkResult vr; + + TRACE("iface %p, gpu_timestamp %p, cpu_timestamp %p.\n", + iface, gpu_timestamp, cpu_timestamp); + + if (!command_queue->vkd3d_queue->timestamp_bits) + { + WARN("Timestamp queries not supported.\n"); + return E_FAIL; + } + + if (!gpu_timestamp || !cpu_timestamp) + return E_INVALIDARG; + + if (!device->vk_info.EXT_calibrated_timestamps || device->vk_host_time_domain == -1) + { + WARN(!device->vk_info.EXT_calibrated_timestamps + ? "VK_EXT_calibrated_timestamps was not found. Setting timestamps to zero.\n" + : "Device and/or host time domain is not available. Setting timestamps to zero.\n"); + *gpu_timestamp = 0; + *cpu_timestamp = 0; + return S_OK; + } + + vk_procs = &device->vk_procs; + + infos[0].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; + infos[0].pNext = NULL; + infos[0].timeDomain = VK_TIME_DOMAIN_DEVICE_EXT; + infos[1].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; + infos[1].pNext = NULL; + infos[1].timeDomain = device->vk_host_time_domain; + + if ((vr = VK_CALL(vkGetCalibratedTimestampsEXT(command_queue->device->vk_device, + ARRAY_SIZE(infos), infos, timestamps, deviations))) < 0) + { + WARN("Failed to get calibrated timestamps, vr %d.\n", vr); + return E_FAIL; + } + + if (infos[1].timeDomain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT + || infos[1].timeDomain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT) + { + /* Convert monotonic clock to match Wine's RtlQueryPerformanceFrequency(). */ + timestamps[1] /= NANOSECONDS_IN_A_SECOND / device->vkd3d_instance->host_ticks_per_second; + } + + *gpu_timestamp = timestamps[0]; + *cpu_timestamp = timestamps[1]; + + return S_OK; +} + +static D3D12_COMMAND_QUEUE_DESC * STDMETHODCALLTYPE d3d12_command_queue_GetDesc(ID3D12CommandQueue *iface, + D3D12_COMMAND_QUEUE_DESC *desc) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + + TRACE("iface %p, desc %p.\n", iface, desc); + + *desc = command_queue->desc; + return desc; +} + +static const struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl = +{ + /* IUnknown methods */ + d3d12_command_queue_QueryInterface, + d3d12_command_queue_AddRef, + d3d12_command_queue_Release, + /* ID3D12Object methods */ + d3d12_command_queue_GetPrivateData, + d3d12_command_queue_SetPrivateData, + d3d12_command_queue_SetPrivateDataInterface, + d3d12_command_queue_SetName, + /* ID3D12DeviceChild methods */ + d3d12_command_queue_GetDevice, + /* ID3D12CommandQueue methods */ + d3d12_command_queue_UpdateTileMappings, + d3d12_command_queue_CopyTileMappings, + d3d12_command_queue_ExecuteCommandLists, + d3d12_command_queue_SetMarker, + d3d12_command_queue_BeginEvent, + d3d12_command_queue_EndEvent, + d3d12_command_queue_Signal, + d3d12_command_queue_Wait, + d3d12_command_queue_GetTimestampFrequency, + d3d12_command_queue_GetClockCalibration, + d3d12_command_queue_GetDesc, +}; + +static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, + struct d3d12_device *device, const D3D12_COMMAND_QUEUE_DESC *desc) +{ + HRESULT hr; + + queue->ID3D12CommandQueue_iface.lpVtbl = &d3d12_command_queue_vtbl; + queue->refcount = 1; + + queue->desc = *desc; + if (!queue->desc.NodeMask) + queue->desc.NodeMask = 0x1; + + if (!(queue->vkd3d_queue = d3d12_device_get_vkd3d_queue(device, desc->Type))) + return E_NOTIMPL; + + queue->last_waited_fence = NULL; + queue->last_waited_fence_value = 0; + + if (desc->Priority == D3D12_COMMAND_QUEUE_PRIORITY_GLOBAL_REALTIME) + { + FIXME("Global realtime priority is not implemented.\n"); + return E_NOTIMPL; + } + + if (desc->Priority) + FIXME("Ignoring priority %#x.\n", desc->Priority); + if (desc->Flags) + FIXME("Ignoring flags %#x.\n", desc->Flags); + + if (FAILED(hr = vkd3d_private_store_init(&queue->private_store))) + return hr; + + d3d12_device_add_ref(queue->device = device); + + return S_OK; +} + +HRESULT d3d12_command_queue_create(struct d3d12_device *device, + const D3D12_COMMAND_QUEUE_DESC *desc, struct d3d12_command_queue **queue) +{ + struct d3d12_command_queue *object; + HRESULT hr; + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_command_queue_init(object, device, desc))) + { + vkd3d_free(object); + return hr; + } + + TRACE("Created command queue %p.\n", object); + + *queue = object; + + return S_OK; +} + +uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue) +{ + struct d3d12_command_queue *d3d12_queue = impl_from_ID3D12CommandQueue(queue); + + return d3d12_queue->vkd3d_queue->vk_family_index; +} + +VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue) +{ + struct d3d12_command_queue *d3d12_queue = impl_from_ID3D12CommandQueue(queue); + + return vkd3d_queue_acquire(d3d12_queue->vkd3d_queue); +} + +void vkd3d_release_vk_queue(ID3D12CommandQueue *queue) +{ + struct d3d12_command_queue *d3d12_queue = impl_from_ID3D12CommandQueue(queue); + + return vkd3d_queue_release(d3d12_queue->vkd3d_queue); +} + +/* ID3D12CommandSignature */ +static inline struct d3d12_command_signature *impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_command_signature, ID3D12CommandSignature_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_signature_QueryInterface(ID3D12CommandSignature *iface, + REFIID iid, void **out) +{ + TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out); + + if (IsEqualGUID(iid, &IID_ID3D12CommandSignature) + || IsEqualGUID(iid, &IID_ID3D12Pageable) + || IsEqualGUID(iid, &IID_ID3D12DeviceChild) + || IsEqualGUID(iid, &IID_ID3D12Object) + || IsEqualGUID(iid, &IID_IUnknown)) + { + ID3D12CommandSignature_AddRef(iface); + *out = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid)); + + *out = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_command_signature_AddRef(ID3D12CommandSignature *iface) +{ + struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface); + ULONG refcount = InterlockedIncrement(&signature->refcount); + + TRACE("%p increasing refcount to %u.\n", signature, refcount); + + return refcount; +} + +static ULONG STDMETHODCALLTYPE d3d12_command_signature_Release(ID3D12CommandSignature *iface) +{ + struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface); + ULONG refcount = InterlockedDecrement(&signature->refcount); + + TRACE("%p decreasing refcount to %u.\n", signature, refcount); + + if (!refcount) + { + struct d3d12_device *device = signature->device; + + vkd3d_private_store_destroy(&signature->private_store); + + vkd3d_free((void *)signature->desc.pArgumentDescs); + vkd3d_free(signature); + + d3d12_device_release(device); + } + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_signature_GetPrivateData(ID3D12CommandSignature *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&signature->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_signature_SetPrivateData(ID3D12CommandSignature *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&signature->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_signature_SetPrivateDataInterface(ID3D12CommandSignature *iface, + REFGUID guid, const IUnknown *data) +{ + struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&signature->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_signature_SetName(ID3D12CommandSignature *iface, const WCHAR *name) +{ + struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, signature->device->wchar_size)); + + return name ? S_OK : E_INVALIDARG; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_signature_GetDevice(ID3D12CommandSignature *iface, REFIID iid, void **device) +{ + struct d3d12_command_signature *signature = impl_from_ID3D12CommandSignature(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(signature->device, iid, device); +} + +static const struct ID3D12CommandSignatureVtbl d3d12_command_signature_vtbl = +{ + /* IUnknown methods */ + d3d12_command_signature_QueryInterface, + d3d12_command_signature_AddRef, + d3d12_command_signature_Release, + /* ID3D12Object methods */ + d3d12_command_signature_GetPrivateData, + d3d12_command_signature_SetPrivateData, + d3d12_command_signature_SetPrivateDataInterface, + d3d12_command_signature_SetName, + /* ID3D12DeviceChild methods */ + d3d12_command_signature_GetDevice, +}; + +struct d3d12_command_signature *unsafe_impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface) +{ + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_command_signature_vtbl); + return CONTAINING_RECORD(iface, struct d3d12_command_signature, ID3D12CommandSignature_iface); +} + +HRESULT d3d12_command_signature_create(struct d3d12_device *device, const D3D12_COMMAND_SIGNATURE_DESC *desc, + struct d3d12_command_signature **signature) +{ + struct d3d12_command_signature *object; + unsigned int i; + HRESULT hr; + + for (i = 0; i < desc->NumArgumentDescs; ++i) + { + const D3D12_INDIRECT_ARGUMENT_DESC *argument_desc = &desc->pArgumentDescs[i]; + switch (argument_desc->Type) + { + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED: + case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: + if (i != desc->NumArgumentDescs - 1) + { + WARN("Draw/dispatch must be the last element of a command signature.\n"); + return E_INVALIDARG; + } + break; + default: + break; + } + } + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + object->ID3D12CommandSignature_iface.lpVtbl = &d3d12_command_signature_vtbl; + object->refcount = 1; + + object->desc = *desc; + if (!(object->desc.pArgumentDescs = vkd3d_calloc(desc->NumArgumentDescs, sizeof(*desc->pArgumentDescs)))) + { + vkd3d_free(object); + return E_OUTOFMEMORY; + } + memcpy((void *)object->desc.pArgumentDescs, desc->pArgumentDescs, + desc->NumArgumentDescs * sizeof(*desc->pArgumentDescs)); + + if (FAILED(hr = vkd3d_private_store_init(&object->private_store))) + { + vkd3d_free((void *)object->desc.pArgumentDescs); + vkd3d_free(object); + return hr; + } + + d3d12_device_add_ref(object->device = device); + + TRACE("Created command signature %p.\n", object); + + *signature = object; + + return S_OK; +} diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c new file mode 100644 index 00000000000..59fa9af9b31 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -0,0 +1,4192 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_private.h" +#include "vkd3d_version.h" + +#ifdef HAVE_DLFCN_H +#include + +static void *vkd3d_dlopen(const char *name) +{ + return dlopen(name, RTLD_NOW); +} + +static void *vkd3d_dlsym(void *handle, const char *symbol) +{ + return dlsym(handle, symbol); +} + +static int vkd3d_dlclose(void *handle) +{ + return dlclose(handle); +} + +static const char *vkd3d_dlerror(void) +{ + return dlerror(); +} +#else +static void *vkd3d_dlopen(const char *name) +{ + FIXME("Not implemented for this platform.\n"); + return NULL; +} + +static void *vkd3d_dlsym(void *handle, const char *symbol) +{ + return NULL; +} + +static int vkd3d_dlclose(void *handle) +{ + return 0; +} + +static const char *vkd3d_dlerror(void) +{ + return "Not implemented for this platform.\n"; +} +#endif + +struct vkd3d_struct +{ + enum vkd3d_structure_type type; + const void *next; +}; + +#define vkd3d_find_struct(c, t) vkd3d_find_struct_(c, VKD3D_STRUCTURE_TYPE_##t) +static const void *vkd3d_find_struct_(const struct vkd3d_struct *chain, + enum vkd3d_structure_type type) +{ + while (chain) + { + if (chain->type == type) + return chain; + + chain = chain->next; + } + + return NULL; +} + +static uint32_t vkd3d_get_vk_version(void) +{ + int major, minor; + + vkd3d_parse_version(PACKAGE_VERSION, &major, &minor); + return VK_MAKE_VERSION(major, minor, 0); +} + +struct vkd3d_optional_extension_info +{ + const char *extension_name; + ptrdiff_t vulkan_info_offset; + bool is_debug_only; +}; + +#define VK_EXTENSION(name, member) \ + {VK_ ## name ## _EXTENSION_NAME, offsetof(struct vkd3d_vulkan_info, member)} +#define VK_DEBUG_EXTENSION(name, member) \ + {VK_ ## name ## _EXTENSION_NAME, offsetof(struct vkd3d_vulkan_info, member), true} + +static const struct vkd3d_optional_extension_info optional_instance_extensions[] = +{ + /* KHR extensions */ + VK_EXTENSION(KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2, KHR_get_physical_device_properties2), + /* EXT extensions */ + VK_DEBUG_EXTENSION(EXT_DEBUG_REPORT, EXT_debug_report), +}; + +static const char * const required_device_extensions[] = +{ + VK_KHR_MAINTENANCE1_EXTENSION_NAME, + VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, +}; + +static const struct vkd3d_optional_extension_info optional_device_extensions[] = +{ + /* KHR extensions */ + VK_EXTENSION(KHR_DEDICATED_ALLOCATION, KHR_dedicated_allocation), + VK_EXTENSION(KHR_DRAW_INDIRECT_COUNT, KHR_draw_indirect_count), + VK_EXTENSION(KHR_GET_MEMORY_REQUIREMENTS_2, KHR_get_memory_requirements2), + VK_EXTENSION(KHR_IMAGE_FORMAT_LIST, KHR_image_format_list), + VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), + VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), + VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), + /* EXT extensions */ + VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), + VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), + VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), + VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), + VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), + VK_EXTENSION(EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, EXT_shader_demote_to_helper_invocation), + VK_EXTENSION(EXT_SHADER_STENCIL_EXPORT, EXT_shader_stencil_export), + VK_EXTENSION(EXT_TEXEL_BUFFER_ALIGNMENT, EXT_texel_buffer_alignment), + VK_EXTENSION(EXT_TRANSFORM_FEEDBACK, EXT_transform_feedback), + VK_EXTENSION(EXT_VERTEX_ATTRIBUTE_DIVISOR, EXT_vertex_attribute_divisor), +}; + +static unsigned int get_spec_version(const VkExtensionProperties *extensions, + unsigned int count, const char *extension_name) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + { + if (!strcmp(extensions[i].extensionName, extension_name)) + return extensions[i].specVersion; + } + return 0; +} + +static bool is_extension_disabled(const char *extension_name) +{ + const char *disabled_extensions; + + if (!(disabled_extensions = getenv("VKD3D_DISABLE_EXTENSIONS"))) + return false; + + return vkd3d_debug_list_has_member(disabled_extensions, extension_name); +} + +static bool has_extension(const VkExtensionProperties *extensions, + unsigned int count, const char *extension_name) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + { + if (is_extension_disabled(extension_name)) + { + WARN("Extension %s is disabled.\n", debugstr_a(extension_name)); + continue; + } + if (!strcmp(extensions[i].extensionName, extension_name)) + return true; + } + return false; +} + +static unsigned int vkd3d_check_extensions(const VkExtensionProperties *extensions, unsigned int count, + const char * const *required_extensions, unsigned int required_extension_count, + const struct vkd3d_optional_extension_info *optional_extensions, unsigned int optional_extension_count, + const char * const *user_extensions, unsigned int user_extension_count, + const char * const *optional_user_extensions, unsigned int optional_user_extension_count, + bool *user_extension_supported, struct vkd3d_vulkan_info *vulkan_info, const char *extension_type, + bool is_debug_enabled) +{ + unsigned int extension_count = 0; + unsigned int i; + + for (i = 0; i < required_extension_count; ++i) + { + if (!has_extension(extensions, count, required_extensions[i])) + ERR("Required %s extension %s is not supported.\n", + extension_type, debugstr_a(required_extensions[i])); + ++extension_count; + } + + for (i = 0; i < optional_extension_count; ++i) + { + const char *extension_name = optional_extensions[i].extension_name; + ptrdiff_t offset = optional_extensions[i].vulkan_info_offset; + bool *supported = (void *)((uintptr_t)vulkan_info + offset); + + if (!is_debug_enabled && optional_extensions[i].is_debug_only) + { + *supported = false; + TRACE("Skipping debug-only extension %s.\n", debugstr_a(extension_name)); + continue; + } + + if ((*supported = has_extension(extensions, count, extension_name))) + { + TRACE("Found %s extension.\n", debugstr_a(extension_name)); + ++extension_count; + } + } + + for (i = 0; i < user_extension_count; ++i) + { + if (!has_extension(extensions, count, user_extensions[i])) + ERR("Required user %s extension %s is not supported.\n", + extension_type, debugstr_a(user_extensions[i])); + ++extension_count; + } + + assert(!optional_user_extension_count || user_extension_supported); + for (i = 0; i < optional_user_extension_count; ++i) + { + if (has_extension(extensions, count, optional_user_extensions[i])) + { + user_extension_supported[i] = true; + ++extension_count; + } + else + { + user_extension_supported[i] = false; + WARN("Optional user %s extension %s is not supported.\n", + extension_type, debugstr_a(optional_user_extensions[i])); + } + } + + return extension_count; +} + +static unsigned int vkd3d_append_extension(const char *extensions[], + unsigned int extension_count, const char *extension_name) +{ + unsigned int i; + + /* avoid duplicates */ + for (i = 0; i < extension_count; ++i) + { + if (!strcmp(extensions[i], extension_name)) + return extension_count; + } + + extensions[extension_count++] = extension_name; + return extension_count; +} + +static unsigned int vkd3d_enable_extensions(const char *extensions[], + const char * const *required_extensions, unsigned int required_extension_count, + const struct vkd3d_optional_extension_info *optional_extensions, unsigned int optional_extension_count, + const char * const *user_extensions, unsigned int user_extension_count, + const char * const *optional_user_extensions, unsigned int optional_user_extension_count, + bool *user_extension_supported, const struct vkd3d_vulkan_info *vulkan_info) +{ + unsigned int extension_count = 0; + unsigned int i; + + for (i = 0; i < required_extension_count; ++i) + { + extensions[extension_count++] = required_extensions[i]; + } + for (i = 0; i < optional_extension_count; ++i) + { + ptrdiff_t offset = optional_extensions[i].vulkan_info_offset; + const bool *supported = (void *)((uintptr_t)vulkan_info + offset); + + if (*supported) + extensions[extension_count++] = optional_extensions[i].extension_name; + } + + for (i = 0; i < user_extension_count; ++i) + { + extension_count = vkd3d_append_extension(extensions, extension_count, user_extensions[i]); + } + assert(!optional_user_extension_count || user_extension_supported); + for (i = 0; i < optional_user_extension_count; ++i) + { + if (!user_extension_supported[i]) + continue; + extension_count = vkd3d_append_extension(extensions, extension_count, optional_user_extensions[i]); + } + + return extension_count; +} + +static HRESULT vkd3d_init_instance_caps(struct vkd3d_instance *instance, + const struct vkd3d_instance_create_info *create_info, + uint32_t *instance_extension_count, bool **user_extension_supported) +{ + const struct vkd3d_vk_global_procs *vk_procs = &instance->vk_global_procs; + const struct vkd3d_optional_instance_extensions_info *optional_extensions; + struct vkd3d_vulkan_info *vulkan_info = &instance->vk_info; + VkExtensionProperties *vk_extensions; + uint32_t count; + VkResult vr; + + memset(vulkan_info, 0, sizeof(*vulkan_info)); + *instance_extension_count = 0; + + if ((vr = vk_procs->vkEnumerateInstanceExtensionProperties(NULL, &count, NULL)) < 0) + { + ERR("Failed to enumerate instance extensions, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + if (!count) + return S_OK; + + if (!(vk_extensions = vkd3d_calloc(count, sizeof(*vk_extensions)))) + return E_OUTOFMEMORY; + + TRACE("Enumerating %u instance extensions.\n", count); + if ((vr = vk_procs->vkEnumerateInstanceExtensionProperties(NULL, &count, vk_extensions)) < 0) + { + ERR("Failed to enumerate instance extensions, vr %d.\n", vr); + vkd3d_free(vk_extensions); + return hresult_from_vk_result(vr); + } + + optional_extensions = vkd3d_find_struct(create_info->next, OPTIONAL_INSTANCE_EXTENSIONS_INFO); + if (optional_extensions && optional_extensions->extension_count) + { + if (!(*user_extension_supported = vkd3d_calloc(optional_extensions->extension_count, sizeof(bool)))) + { + vkd3d_free(vk_extensions); + return E_OUTOFMEMORY; + } + } + else + { + *user_extension_supported = NULL; + } + + *instance_extension_count = vkd3d_check_extensions(vk_extensions, count, NULL, 0, + optional_instance_extensions, ARRAY_SIZE(optional_instance_extensions), + create_info->instance_extensions, create_info->instance_extension_count, + optional_extensions ? optional_extensions->extensions : NULL, + optional_extensions ? optional_extensions->extension_count : 0, + *user_extension_supported, vulkan_info, "instance", + instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); + + vkd3d_free(vk_extensions); + return S_OK; +} + +static HRESULT vkd3d_init_vk_global_procs(struct vkd3d_instance *instance, + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) +{ + HRESULT hr; + + if (!vkGetInstanceProcAddr) + { + if (!(instance->libvulkan = vkd3d_dlopen(SONAME_LIBVULKAN))) + { + ERR("Failed to load libvulkan: %s.\n", vkd3d_dlerror()); + return E_FAIL; + } + + if (!(vkGetInstanceProcAddr = vkd3d_dlsym(instance->libvulkan, "vkGetInstanceProcAddr"))) + { + ERR("Could not load function pointer for vkGetInstanceProcAddr().\n"); + vkd3d_dlclose(instance->libvulkan); + instance->libvulkan = NULL; + return E_FAIL; + } + } + else + { + instance->libvulkan = NULL; + } + + if (FAILED(hr = vkd3d_load_vk_global_procs(&instance->vk_global_procs, vkGetInstanceProcAddr))) + { + if (instance->libvulkan) + vkd3d_dlclose(instance->libvulkan); + instance->libvulkan = NULL; + return hr; + } + + return S_OK; +} + +static VkBool32 VKAPI_PTR vkd3d_debug_report_callback(VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT object_type, uint64_t object, size_t location, + int32_t message_code, const char *layer_prefix, const char *message, void *user_data) +{ + FIXME("%s\n", debugstr_a(message)); + return VK_FALSE; +} + +static void vkd3d_init_debug_report(struct vkd3d_instance *instance) +{ + const struct vkd3d_vk_instance_procs *vk_procs = &instance->vk_procs; + VkDebugReportCallbackCreateInfoEXT callback_info; + VkInstance vk_instance = instance->vk_instance; + VkDebugReportCallbackEXT callback; + VkResult vr; + + callback_info.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; + callback_info.pNext = NULL; + callback_info.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT; + callback_info.pfnCallback = vkd3d_debug_report_callback; + callback_info.pUserData = NULL; + if ((vr = VK_CALL(vkCreateDebugReportCallbackEXT(vk_instance, &callback_info, NULL, &callback))) < 0) + { + WARN("Failed to create debug report callback, vr %d.\n", vr); + return; + } + + instance->vk_debug_callback = callback; +} + +static const struct vkd3d_debug_option vkd3d_config_options[] = +{ + {"vk_debug", VKD3D_CONFIG_FLAG_VULKAN_DEBUG}, /* enable Vulkan debug extensions */ +}; + +static uint64_t vkd3d_init_config_flags(void) +{ + uint64_t config_flags; + const char *config; + + config = getenv("VKD3D_CONFIG"); + config_flags = vkd3d_parse_debug_options(config, vkd3d_config_options, ARRAY_SIZE(vkd3d_config_options)); + + if (config_flags) + TRACE("VKD3D_CONFIG='%s'.\n", config); + + return config_flags; +} + +/* TICKSPERSEC from Wine */ +#define VKD3D_DEFAULT_HOST_TICKS_PER_SECOND 10000000 + +static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, + const struct vkd3d_instance_create_info *create_info) +{ + const struct vkd3d_vk_global_procs *vk_global_procs = &instance->vk_global_procs; + const struct vkd3d_optional_instance_extensions_info *optional_extensions; + const struct vkd3d_application_info *vkd3d_application_info; + const struct vkd3d_host_time_domain_info *time_domain_info; + bool *user_extension_supported = NULL; + VkApplicationInfo application_info; + VkInstanceCreateInfo instance_info; + char application_name[PATH_MAX]; + uint32_t extension_count; + const char **extensions; + VkInstance vk_instance; + VkResult vr; + HRESULT hr; + + TRACE("Build: " PACKAGE_STRING VKD3D_VCS_ID ".\n"); + + if (!create_info->pfn_signal_event) + { + ERR("Invalid signal event function pointer.\n"); + return E_INVALIDARG; + } + if (!create_info->pfn_create_thread != !create_info->pfn_join_thread) + { + ERR("Invalid create/join thread function pointers.\n"); + return E_INVALIDARG; + } + if (create_info->wchar_size != 2 && create_info->wchar_size != 4) + { + ERR("Unexpected WCHAR size %zu.\n", create_info->wchar_size); + return E_INVALIDARG; + } + + instance->signal_event = create_info->pfn_signal_event; + instance->create_thread = create_info->pfn_create_thread; + instance->join_thread = create_info->pfn_join_thread; + instance->wchar_size = create_info->wchar_size; + + instance->config_flags = vkd3d_init_config_flags(); + + if (FAILED(hr = vkd3d_init_vk_global_procs(instance, create_info->pfn_vkGetInstanceProcAddr))) + { + ERR("Failed to initialize Vulkan global procs, hr %#x.\n", hr); + return hr; + } + + if (FAILED(hr = vkd3d_init_instance_caps(instance, create_info, + &extension_count, &user_extension_supported))) + { + if (instance->libvulkan) + vkd3d_dlclose(instance->libvulkan); + return hr; + } + + application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + application_info.pNext = NULL; + application_info.pApplicationName = NULL; + application_info.applicationVersion = 0; + application_info.pEngineName = PACKAGE_NAME; + application_info.engineVersion = vkd3d_get_vk_version(); + application_info.apiVersion = VK_API_VERSION_1_0; + instance->api_version = VKD3D_API_VERSION_1_0; + + if ((vkd3d_application_info = vkd3d_find_struct(create_info->next, APPLICATION_INFO))) + { + if (vkd3d_application_info->application_name) + application_info.pApplicationName = vkd3d_application_info->application_name; + else if (vkd3d_get_program_name(application_name)) + application_info.pApplicationName = application_name; + application_info.applicationVersion = vkd3d_application_info->application_version; + if (vkd3d_application_info->engine_name) + { + application_info.pEngineName = vkd3d_application_info->engine_name; + application_info.engineVersion = vkd3d_application_info->engine_version; + } + instance->api_version = vkd3d_application_info->api_version; + } + else if (vkd3d_get_program_name(application_name)) + { + application_info.pApplicationName = application_name; + } + + TRACE("Application: %s.\n", debugstr_a(application_info.pApplicationName)); + TRACE("vkd3d API version: %u.\n", instance->api_version); + + if (!(extensions = vkd3d_calloc(extension_count, sizeof(*extensions)))) + { + if (instance->libvulkan) + vkd3d_dlclose(instance->libvulkan); + vkd3d_free(user_extension_supported); + return E_OUTOFMEMORY; + } + + optional_extensions = vkd3d_find_struct(create_info->next, OPTIONAL_INSTANCE_EXTENSIONS_INFO); + + instance_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instance_info.pNext = NULL; + instance_info.flags = 0; + instance_info.pApplicationInfo = &application_info; + instance_info.enabledLayerCount = 0; + instance_info.ppEnabledLayerNames = NULL; + instance_info.enabledExtensionCount = vkd3d_enable_extensions(extensions, NULL, 0, + optional_instance_extensions, ARRAY_SIZE(optional_instance_extensions), + create_info->instance_extensions, create_info->instance_extension_count, + optional_extensions ? optional_extensions->extensions : NULL, + optional_extensions ? optional_extensions->extension_count : 0, + user_extension_supported, &instance->vk_info); + instance_info.ppEnabledExtensionNames = extensions; + vkd3d_free(user_extension_supported); + + vr = vk_global_procs->vkCreateInstance(&instance_info, NULL, &vk_instance); + vkd3d_free(extensions); + if (vr < 0) + { + ERR("Failed to create Vulkan instance, vr %d.\n", vr); + if (instance->libvulkan) + vkd3d_dlclose(instance->libvulkan); + return hresult_from_vk_result(vr); + } + + if (FAILED(hr = vkd3d_load_vk_instance_procs(&instance->vk_procs, vk_global_procs, vk_instance))) + { + ERR("Failed to load instance procs, hr %#x.\n", hr); + if (instance->vk_procs.vkDestroyInstance) + instance->vk_procs.vkDestroyInstance(vk_instance, NULL); + if (instance->libvulkan) + vkd3d_dlclose(instance->libvulkan); + return hr; + } + + if ((time_domain_info = vkd3d_find_struct(create_info->next, HOST_TIME_DOMAIN_INFO))) + instance->host_ticks_per_second = time_domain_info->ticks_per_second; + else + instance->host_ticks_per_second = VKD3D_DEFAULT_HOST_TICKS_PER_SECOND; + + instance->vk_instance = vk_instance; + + TRACE("Created Vulkan instance %p.\n", vk_instance); + + instance->refcount = 1; + + instance->vk_debug_callback = VK_NULL_HANDLE; + if (instance->vk_info.EXT_debug_report) + vkd3d_init_debug_report(instance); + + return S_OK; +} + +HRESULT vkd3d_create_instance(const struct vkd3d_instance_create_info *create_info, + struct vkd3d_instance **instance) +{ + struct vkd3d_instance *object; + HRESULT hr; + + TRACE("create_info %p, instance %p.\n", create_info, instance); + + if (!create_info || !instance) + return E_INVALIDARG; + if (create_info->type != VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO) + { + WARN("Invalid structure type %#x.\n", create_info->type); + return E_INVALIDARG; + } + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = vkd3d_instance_init(object, create_info))) + { + vkd3d_free(object); + return hr; + } + + TRACE("Created instance %p.\n", object); + + *instance = object; + + return S_OK; +} + +static void vkd3d_destroy_instance(struct vkd3d_instance *instance) +{ + const struct vkd3d_vk_instance_procs *vk_procs = &instance->vk_procs; + VkInstance vk_instance = instance->vk_instance; + + if (instance->vk_debug_callback) + VK_CALL(vkDestroyDebugReportCallbackEXT(vk_instance, instance->vk_debug_callback, NULL)); + + VK_CALL(vkDestroyInstance(vk_instance, NULL)); + + if (instance->libvulkan) + vkd3d_dlclose(instance->libvulkan); + + vkd3d_free(instance); +} + +ULONG vkd3d_instance_incref(struct vkd3d_instance *instance) +{ + ULONG refcount = InterlockedIncrement(&instance->refcount); + + TRACE("%p increasing refcount to %u.\n", instance, refcount); + + return refcount; +} + +ULONG vkd3d_instance_decref(struct vkd3d_instance *instance) +{ + ULONG refcount = InterlockedDecrement(&instance->refcount); + + TRACE("%p decreasing refcount to %u.\n", instance, refcount); + + if (!refcount) + vkd3d_destroy_instance(instance); + + return refcount; +} + +VkInstance vkd3d_instance_get_vk_instance(struct vkd3d_instance *instance) +{ + return instance->vk_instance; +} + +struct vkd3d_physical_device_info +{ + /* properties */ + VkPhysicalDeviceDescriptorIndexingPropertiesEXT descriptor_indexing_properties; + VkPhysicalDeviceMaintenance3Properties maintenance3_properties; + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; + VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties; + VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties; + + VkPhysicalDeviceProperties2KHR properties2; + + /* features */ + VkPhysicalDeviceConditionalRenderingFeaturesEXT conditional_rendering_features; + VkPhysicalDeviceDepthClipEnableFeaturesEXT depth_clip_features; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features; + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote_features; + VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features; + VkPhysicalDeviceTransformFeedbackFeaturesEXT xfb_features; + VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features; + + VkPhysicalDeviceFeatures2 features2; +}; + +static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *info, struct d3d12_device *device) +{ + const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; + VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; + VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties; + VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; + VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; + VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; + VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features; + VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; + VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; + VkPhysicalDevice physical_device = device->vk_physical_device; + VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb_features; + struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; + + memset(info, 0, sizeof(*info)); + conditional_rendering_features = &info->conditional_rendering_features; + depth_clip_features = &info->depth_clip_features; + descriptor_indexing_features = &info->descriptor_indexing_features; + descriptor_indexing_properties = &info->descriptor_indexing_properties; + maintenance3_properties = &info->maintenance3_properties; + demote_features = &info->demote_features; + buffer_alignment_features = &info->texel_buffer_alignment_features; + buffer_alignment_properties = &info->texel_buffer_alignment_properties; + vertex_divisor_features = &info->vertex_divisor_features; + vertex_divisor_properties = &info->vertex_divisor_properties; + xfb_features = &info->xfb_features; + xfb_properties = &info->xfb_properties; + + info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + + conditional_rendering_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; + vk_prepend_struct(&info->features2, conditional_rendering_features); + depth_clip_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; + vk_prepend_struct(&info->features2, depth_clip_features); + descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; + vk_prepend_struct(&info->features2, descriptor_indexing_features); + demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; + vk_prepend_struct(&info->features2, demote_features); + buffer_alignment_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; + vk_prepend_struct(&info->features2, buffer_alignment_features); + xfb_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; + vk_prepend_struct(&info->features2, xfb_features); + vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; + vk_prepend_struct(&info->features2, vertex_divisor_features); + + if (vulkan_info->KHR_get_physical_device_properties2) + VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); + else + VK_CALL(vkGetPhysicalDeviceFeatures(physical_device, &info->features2.features)); + + info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + + maintenance3_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; + vk_prepend_struct(&info->properties2, maintenance3_properties); + descriptor_indexing_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; + vk_prepend_struct(&info->properties2, descriptor_indexing_properties); + buffer_alignment_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; + vk_prepend_struct(&info->properties2, buffer_alignment_properties); + xfb_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; + vk_prepend_struct(&info->properties2, xfb_properties); + vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; + vk_prepend_struct(&info->properties2, vertex_divisor_properties); + + if (vulkan_info->KHR_get_physical_device_properties2) + VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); + else + VK_CALL(vkGetPhysicalDeviceProperties(physical_device, &info->properties2.properties)); +} + +static void vkd3d_trace_physical_device_properties(const VkPhysicalDeviceProperties *properties) +{ + const uint32_t driver_version = properties->driverVersion; + const uint32_t api_version = properties->apiVersion; + + TRACE("Device name: %s.\n", properties->deviceName); + TRACE("Vendor ID: %#x, Device ID: %#x.\n", properties->vendorID, properties->deviceID); + TRACE("Driver version: %#x (%u.%u.%u, %u.%u.%u.%u).\n", driver_version, + VK_VERSION_MAJOR(driver_version), VK_VERSION_MINOR(driver_version), VK_VERSION_PATCH(driver_version), + driver_version >> 22, (driver_version >> 14) & 0xff, (driver_version >> 6) & 0xff, driver_version & 0x3f); + TRACE("API version: %u.%u.%u.\n", + VK_VERSION_MAJOR(api_version), VK_VERSION_MINOR(api_version), VK_VERSION_PATCH(api_version)); +} + +static void vkd3d_trace_physical_device(VkPhysicalDevice device, + const struct vkd3d_physical_device_info *info, + const struct vkd3d_vk_instance_procs *vk_procs) +{ + VkPhysicalDeviceMemoryProperties memory_properties; + VkQueueFamilyProperties *queue_properties; + unsigned int i, j; + uint32_t count; + + vkd3d_trace_physical_device_properties(&info->properties2.properties); + + VK_CALL(vkGetPhysicalDeviceQueueFamilyProperties(device, &count, NULL)); + TRACE("Queue families [%u]:\n", count); + + if (!(queue_properties = vkd3d_calloc(count, sizeof(VkQueueFamilyProperties)))) + return; + VK_CALL(vkGetPhysicalDeviceQueueFamilyProperties(device, &count, queue_properties)); + + for (i = 0; i < count; ++i) + { + TRACE(" Queue family [%u]: flags %s, count %u, timestamp bits %u, image transfer granularity %s.\n", + i, debug_vk_queue_flags(queue_properties[i].queueFlags), + queue_properties[i].queueCount, queue_properties[i].timestampValidBits, + debug_vk_extent_3d(queue_properties[i].minImageTransferGranularity)); + } + vkd3d_free(queue_properties); + + VK_CALL(vkGetPhysicalDeviceMemoryProperties(device, &memory_properties)); + for (i = 0; i < memory_properties.memoryHeapCount; ++i) + { + const VkMemoryHeap *heap = &memory_properties.memoryHeaps[i]; + TRACE("Memory heap [%u]: size %#"PRIx64" (%"PRIu64" MiB), flags %s, memory types:\n", + i, heap->size, heap->size / 1024 / 1024, debug_vk_memory_heap_flags(heap->flags)); + for (j = 0; j < memory_properties.memoryTypeCount; ++j) + { + const VkMemoryType *type = &memory_properties.memoryTypes[j]; + if (type->heapIndex != i) + continue; + TRACE(" Memory type [%u]: flags %s.\n", j, debug_vk_memory_property_flags(type->propertyFlags)); + } + } +} + +static void vkd3d_trace_physical_device_limits(const struct vkd3d_physical_device_info *info) +{ + const VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *divisor_properties; + const VkPhysicalDeviceLimits *limits = &info->properties2.properties.limits; + const VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing; + const VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment; + const VkPhysicalDeviceMaintenance3Properties *maintenance3; + const VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb; + + TRACE("Device limits:\n"); + TRACE(" maxImageDimension1D: %u.\n", limits->maxImageDimension1D); + TRACE(" maxImageDimension2D: %u.\n", limits->maxImageDimension2D); + TRACE(" maxImageDimension3D: %u.\n", limits->maxImageDimension3D); + TRACE(" maxImageDimensionCube: %u.\n", limits->maxImageDimensionCube); + TRACE(" maxImageArrayLayers: %u.\n", limits->maxImageArrayLayers); + TRACE(" maxTexelBufferElements: %u.\n", limits->maxTexelBufferElements); + TRACE(" maxUniformBufferRange: %u.\n", limits->maxUniformBufferRange); + TRACE(" maxStorageBufferRange: %u.\n", limits->maxStorageBufferRange); + TRACE(" maxPushConstantsSize: %u.\n", limits->maxPushConstantsSize); + TRACE(" maxMemoryAllocationCount: %u.\n", limits->maxMemoryAllocationCount); + TRACE(" maxSamplerAllocationCount: %u.\n", limits->maxSamplerAllocationCount); + TRACE(" bufferImageGranularity: %#"PRIx64".\n", limits->bufferImageGranularity); + TRACE(" sparseAddressSpaceSize: %#"PRIx64".\n", limits->sparseAddressSpaceSize); + TRACE(" maxBoundDescriptorSets: %u.\n", limits->maxBoundDescriptorSets); + TRACE(" maxPerStageDescriptorSamplers: %u.\n", limits->maxPerStageDescriptorSamplers); + TRACE(" maxPerStageDescriptorUniformBuffers: %u.\n", limits->maxPerStageDescriptorUniformBuffers); + TRACE(" maxPerStageDescriptorStorageBuffers: %u.\n", limits->maxPerStageDescriptorStorageBuffers); + TRACE(" maxPerStageDescriptorSampledImages: %u.\n", limits->maxPerStageDescriptorSampledImages); + TRACE(" maxPerStageDescriptorStorageImages: %u.\n", limits->maxPerStageDescriptorStorageImages); + TRACE(" maxPerStageDescriptorInputAttachments: %u.\n", limits->maxPerStageDescriptorInputAttachments); + TRACE(" maxPerStageResources: %u.\n", limits->maxPerStageResources); + TRACE(" maxDescriptorSetSamplers: %u.\n", limits->maxDescriptorSetSamplers); + TRACE(" maxDescriptorSetUniformBuffers: %u.\n", limits->maxDescriptorSetUniformBuffers); + TRACE(" maxDescriptorSetUniformBuffersDynamic: %u.\n", limits->maxDescriptorSetUniformBuffersDynamic); + TRACE(" maxDescriptorSetStorageBuffers: %u.\n", limits->maxDescriptorSetStorageBuffers); + TRACE(" maxDescriptorSetStorageBuffersDynamic: %u.\n", limits->maxDescriptorSetStorageBuffersDynamic); + TRACE(" maxDescriptorSetSampledImages: %u.\n", limits->maxDescriptorSetSampledImages); + TRACE(" maxDescriptorSetStorageImages: %u.\n", limits->maxDescriptorSetStorageImages); + TRACE(" maxDescriptorSetInputAttachments: %u.\n", limits->maxDescriptorSetInputAttachments); + TRACE(" maxVertexInputAttributes: %u.\n", limits->maxVertexInputAttributes); + TRACE(" maxVertexInputBindings: %u.\n", limits->maxVertexInputBindings); + TRACE(" maxVertexInputAttributeOffset: %u.\n", limits->maxVertexInputAttributeOffset); + TRACE(" maxVertexInputBindingStride: %u.\n", limits->maxVertexInputBindingStride); + TRACE(" maxVertexOutputComponents: %u.\n", limits->maxVertexOutputComponents); + TRACE(" maxTessellationGenerationLevel: %u.\n", limits->maxTessellationGenerationLevel); + TRACE(" maxTessellationPatchSize: %u.\n", limits->maxTessellationPatchSize); + TRACE(" maxTessellationControlPerVertexInputComponents: %u.\n", + limits->maxTessellationControlPerVertexInputComponents); + TRACE(" maxTessellationControlPerVertexOutputComponents: %u.\n", + limits->maxTessellationControlPerVertexOutputComponents); + TRACE(" maxTessellationControlPerPatchOutputComponents: %u.\n", + limits->maxTessellationControlPerPatchOutputComponents); + TRACE(" maxTessellationControlTotalOutputComponents: %u.\n", + limits->maxTessellationControlTotalOutputComponents); + TRACE(" maxTessellationEvaluationInputComponents: %u.\n", + limits->maxTessellationEvaluationInputComponents); + TRACE(" maxTessellationEvaluationOutputComponents: %u.\n", + limits->maxTessellationEvaluationOutputComponents); + TRACE(" maxGeometryShaderInvocations: %u.\n", limits->maxGeometryShaderInvocations); + TRACE(" maxGeometryInputComponents: %u.\n", limits->maxGeometryInputComponents); + TRACE(" maxGeometryOutputComponents: %u.\n", limits->maxGeometryOutputComponents); + TRACE(" maxGeometryOutputVertices: %u.\n", limits->maxGeometryOutputVertices); + TRACE(" maxGeometryTotalOutputComponents: %u.\n", limits->maxGeometryTotalOutputComponents); + TRACE(" maxFragmentInputComponents: %u.\n", limits->maxFragmentInputComponents); + TRACE(" maxFragmentOutputAttachments: %u.\n", limits->maxFragmentOutputAttachments); + TRACE(" maxFragmentDualSrcAttachments: %u.\n", limits->maxFragmentDualSrcAttachments); + TRACE(" maxFragmentCombinedOutputResources: %u.\n", limits->maxFragmentCombinedOutputResources); + TRACE(" maxComputeSharedMemorySize: %u.\n", limits->maxComputeSharedMemorySize); + TRACE(" maxComputeWorkGroupCount: %u, %u, %u.\n", limits->maxComputeWorkGroupCount[0], + limits->maxComputeWorkGroupCount[1], limits->maxComputeWorkGroupCount[2]); + TRACE(" maxComputeWorkGroupInvocations: %u.\n", limits->maxComputeWorkGroupInvocations); + TRACE(" maxComputeWorkGroupSize: %u, %u, %u.\n", limits->maxComputeWorkGroupSize[0], + limits->maxComputeWorkGroupSize[1], limits->maxComputeWorkGroupSize[2]); + TRACE(" subPixelPrecisionBits: %u.\n", limits->subPixelPrecisionBits); + TRACE(" subTexelPrecisionBits: %u.\n", limits->subTexelPrecisionBits); + TRACE(" mipmapPrecisionBits: %u.\n", limits->mipmapPrecisionBits); + TRACE(" maxDrawIndexedIndexValue: %u.\n", limits->maxDrawIndexedIndexValue); + TRACE(" maxDrawIndirectCount: %u.\n", limits->maxDrawIndirectCount); + TRACE(" maxSamplerLodBias: %f.\n", limits->maxSamplerLodBias); + TRACE(" maxSamplerAnisotropy: %f.\n", limits->maxSamplerAnisotropy); + TRACE(" maxViewports: %u.\n", limits->maxViewports); + TRACE(" maxViewportDimensions: %u, %u.\n", limits->maxViewportDimensions[0], + limits->maxViewportDimensions[1]); + TRACE(" viewportBoundsRange: %f, %f.\n", limits->viewportBoundsRange[0], limits->viewportBoundsRange[1]); + TRACE(" viewportSubPixelBits: %u.\n", limits->viewportSubPixelBits); + TRACE(" minMemoryMapAlignment: %u.\n", (unsigned int)limits->minMemoryMapAlignment); + TRACE(" minTexelBufferOffsetAlignment: %#"PRIx64".\n", limits->minTexelBufferOffsetAlignment); + TRACE(" minUniformBufferOffsetAlignment: %#"PRIx64".\n", limits->minUniformBufferOffsetAlignment); + TRACE(" minStorageBufferOffsetAlignment: %#"PRIx64".\n", limits->minStorageBufferOffsetAlignment); + TRACE(" minTexelOffset: %d.\n", limits->minTexelOffset); + TRACE(" maxTexelOffset: %u.\n", limits->maxTexelOffset); + TRACE(" minTexelGatherOffset: %d.\n", limits->minTexelGatherOffset); + TRACE(" maxTexelGatherOffset: %u.\n", limits->maxTexelGatherOffset); + TRACE(" minInterpolationOffset: %f.\n", limits->minInterpolationOffset); + TRACE(" maxInterpolationOffset: %f.\n", limits->maxInterpolationOffset); + TRACE(" subPixelInterpolationOffsetBits: %u.\n", limits->subPixelInterpolationOffsetBits); + TRACE(" maxFramebufferWidth: %u.\n", limits->maxFramebufferWidth); + TRACE(" maxFramebufferHeight: %u.\n", limits->maxFramebufferHeight); + TRACE(" maxFramebufferLayers: %u.\n", limits->maxFramebufferLayers); + TRACE(" framebufferColorSampleCounts: %#x.\n", limits->framebufferColorSampleCounts); + TRACE(" framebufferDepthSampleCounts: %#x.\n", limits->framebufferDepthSampleCounts); + TRACE(" framebufferStencilSampleCounts: %#x.\n", limits->framebufferStencilSampleCounts); + TRACE(" framebufferNoAttachmentsSampleCounts: %#x.\n", limits->framebufferNoAttachmentsSampleCounts); + TRACE(" maxColorAttachments: %u.\n", limits->maxColorAttachments); + TRACE(" sampledImageColorSampleCounts: %#x.\n", limits->sampledImageColorSampleCounts); + TRACE(" sampledImageIntegerSampleCounts: %#x.\n", limits->sampledImageIntegerSampleCounts); + TRACE(" sampledImageDepthSampleCounts: %#x.\n", limits->sampledImageDepthSampleCounts); + TRACE(" sampledImageStencilSampleCounts: %#x.\n", limits->sampledImageStencilSampleCounts); + TRACE(" storageImageSampleCounts: %#x.\n", limits->storageImageSampleCounts); + TRACE(" maxSampleMaskWords: %u.\n", limits->maxSampleMaskWords); + TRACE(" timestampComputeAndGraphics: %#x.\n", limits->timestampComputeAndGraphics); + TRACE(" timestampPeriod: %f.\n", limits->timestampPeriod); + TRACE(" maxClipDistances: %u.\n", limits->maxClipDistances); + TRACE(" maxCullDistances: %u.\n", limits->maxCullDistances); + TRACE(" maxCombinedClipAndCullDistances: %u.\n", limits->maxCombinedClipAndCullDistances); + TRACE(" discreteQueuePriorities: %u.\n", limits->discreteQueuePriorities); + TRACE(" pointSizeRange: %f, %f.\n", limits->pointSizeRange[0], limits->pointSizeRange[1]); + TRACE(" lineWidthRange: %f, %f,\n", limits->lineWidthRange[0], limits->lineWidthRange[1]); + TRACE(" pointSizeGranularity: %f.\n", limits->pointSizeGranularity); + TRACE(" lineWidthGranularity: %f.\n", limits->lineWidthGranularity); + TRACE(" strictLines: %#x.\n", limits->strictLines); + TRACE(" standardSampleLocations: %#x.\n", limits->standardSampleLocations); + TRACE(" optimalBufferCopyOffsetAlignment: %#"PRIx64".\n", limits->optimalBufferCopyOffsetAlignment); + TRACE(" optimalBufferCopyRowPitchAlignment: %#"PRIx64".\n", limits->optimalBufferCopyRowPitchAlignment); + TRACE(" nonCoherentAtomSize: %#"PRIx64".\n", limits->nonCoherentAtomSize); + + descriptor_indexing = &info->descriptor_indexing_properties; + TRACE(" VkPhysicalDeviceDescriptorIndexingPropertiesEXT:\n"); + + TRACE(" maxUpdateAfterBindDescriptorsInAllPools: %u.\n", + descriptor_indexing->maxUpdateAfterBindDescriptorsInAllPools); + + TRACE(" shaderUniformBufferArrayNonUniformIndexingNative: %#x.\n", + descriptor_indexing->shaderUniformBufferArrayNonUniformIndexingNative); + TRACE(" shaderSampledImageArrayNonUniformIndexingNative: %#x.\n", + descriptor_indexing->shaderSampledImageArrayNonUniformIndexingNative); + TRACE(" shaderStorageBufferArrayNonUniformIndexingNative: %#x.\n", + descriptor_indexing->shaderStorageBufferArrayNonUniformIndexingNative); + TRACE(" shaderStorageImageArrayNonUniformIndexingNative: %#x.\n", + descriptor_indexing->shaderStorageImageArrayNonUniformIndexingNative); + TRACE(" shaderInputAttachmentArrayNonUniformIndexingNative: %#x.\n", + descriptor_indexing->shaderInputAttachmentArrayNonUniformIndexingNative); + + TRACE(" robustBufferAccessUpdateAfterBind: %#x.\n", + descriptor_indexing->robustBufferAccessUpdateAfterBind); + TRACE(" quadDivergentImplicitLod: %#x.\n", + descriptor_indexing->quadDivergentImplicitLod); + + TRACE(" maxPerStageDescriptorUpdateAfterBindSamplers: %u.\n", + descriptor_indexing->maxPerStageDescriptorUpdateAfterBindSamplers); + TRACE(" maxPerStageDescriptorUpdateAfterBindUniformBuffers: %u.\n", + descriptor_indexing->maxPerStageDescriptorUpdateAfterBindUniformBuffers); + TRACE(" maxPerStageDescriptorUpdateAfterBindStorageBuffers: %u.\n", + descriptor_indexing->maxPerStageDescriptorUpdateAfterBindStorageBuffers); + TRACE(" maxPerStageDescriptorUpdateAfterBindSampledImages: %u.\n", + descriptor_indexing->maxPerStageDescriptorUpdateAfterBindSampledImages); + TRACE(" maxPerStageDescriptorUpdateAfterBindStorageImages: %u.\n", + descriptor_indexing->maxPerStageDescriptorUpdateAfterBindStorageImages); + TRACE(" maxPerStageDescriptorUpdateAfterBindInputAttachments: %u.\n", + descriptor_indexing->maxPerStageDescriptorUpdateAfterBindInputAttachments); + TRACE(" maxPerStageUpdateAfterBindResources: %u.\n", + descriptor_indexing->maxPerStageUpdateAfterBindResources); + + TRACE(" maxDescriptorSetUpdateAfterBindSamplers: %u.\n", + descriptor_indexing->maxDescriptorSetUpdateAfterBindSamplers); + TRACE(" maxDescriptorSetUpdateAfterBindUniformBuffers: %u.\n", + descriptor_indexing->maxDescriptorSetUpdateAfterBindUniformBuffers); + TRACE(" maxDescriptorSetUpdateAfterBindUniformBuffersDynamic: %u.\n", + descriptor_indexing->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic); + TRACE(" maxDescriptorSetUpdateAfterBindStorageBuffers: %u.\n", + descriptor_indexing->maxDescriptorSetUpdateAfterBindStorageBuffers); + TRACE(" maxDescriptorSetUpdateAfterBindStorageBuffersDynamic: %u.\n", + descriptor_indexing->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic); + TRACE(" maxDescriptorSetUpdateAfterBindSampledImages: %u.\n", + descriptor_indexing->maxDescriptorSetUpdateAfterBindSampledImages); + TRACE(" maxDescriptorSetUpdateAfterBindStorageImages: %u.\n", + descriptor_indexing->maxDescriptorSetUpdateAfterBindStorageImages); + TRACE(" maxDescriptorSetUpdateAfterBindInputAttachments: %u.\n", + descriptor_indexing->maxDescriptorSetUpdateAfterBindInputAttachments); + + maintenance3 = &info->maintenance3_properties; + TRACE(" VkPhysicalDeviceMaintenance3Properties:\n"); + TRACE(" maxPerSetDescriptors: %u.\n", maintenance3->maxPerSetDescriptors); + TRACE(" maxMemoryAllocationSize: %#"PRIx64".\n", maintenance3->maxMemoryAllocationSize); + + buffer_alignment = &info->texel_buffer_alignment_properties; + TRACE(" VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT:\n"); + TRACE(" storageTexelBufferOffsetAlignmentBytes: %#"PRIx64".\n", + buffer_alignment->storageTexelBufferOffsetAlignmentBytes); + TRACE(" storageTexelBufferOffsetSingleTexelAlignment: %#x.\n", + buffer_alignment->storageTexelBufferOffsetSingleTexelAlignment); + TRACE(" uniformTexelBufferOffsetAlignmentBytes: %#"PRIx64".\n", + buffer_alignment->uniformTexelBufferOffsetAlignmentBytes); + TRACE(" uniformTexelBufferOffsetSingleTexelAlignment: %#x.\n", + buffer_alignment->uniformTexelBufferOffsetSingleTexelAlignment); + + xfb = &info->xfb_properties; + TRACE(" VkPhysicalDeviceTransformFeedbackPropertiesEXT:\n"); + TRACE(" maxTransformFeedbackStreams: %u.\n", xfb->maxTransformFeedbackStreams); + TRACE(" maxTransformFeedbackBuffers: %u.\n", xfb->maxTransformFeedbackBuffers); + TRACE(" maxTransformFeedbackBufferSize: %#"PRIx64".\n", xfb->maxTransformFeedbackBufferSize); + TRACE(" maxTransformFeedbackStreamDataSize: %u.\n", xfb->maxTransformFeedbackStreamDataSize); + TRACE(" maxTransformFeedbackBufferDataSize: %u.\n", xfb->maxTransformFeedbackBufferDataSize); + TRACE(" maxTransformFeedbackBufferDataStride: %u.\n", xfb->maxTransformFeedbackBufferDataStride); + TRACE(" transformFeedbackQueries: %#x.\n", xfb->transformFeedbackQueries); + TRACE(" transformFeedbackStreamsLinesTriangles: %#x.\n", xfb->transformFeedbackStreamsLinesTriangles); + TRACE(" transformFeedbackRasterizationStreamSelect: %#x.\n", xfb->transformFeedbackRasterizationStreamSelect); + TRACE(" transformFeedbackDraw: %x.\n", xfb->transformFeedbackDraw); + + divisor_properties = &info->vertex_divisor_properties; + TRACE(" VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT:\n"); + TRACE(" maxVertexAttribDivisor: %u.\n", divisor_properties->maxVertexAttribDivisor); +} + +static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_device_info *info) +{ + const VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; + const VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; + const VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; + const VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *divisor_features; + const VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; + const VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features; + const VkPhysicalDeviceFeatures *features = &info->features2.features; + const VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb; + + TRACE("Device features:\n"); + TRACE(" robustBufferAccess: %#x.\n", features->robustBufferAccess); + TRACE(" fullDrawIndexUint32: %#x.\n", features->fullDrawIndexUint32); + TRACE(" imageCubeArray: %#x.\n", features->imageCubeArray); + TRACE(" independentBlend: %#x.\n", features->independentBlend); + TRACE(" geometryShader: %#x.\n", features->geometryShader); + TRACE(" tessellationShader: %#x.\n", features->tessellationShader); + TRACE(" sampleRateShading: %#x.\n", features->sampleRateShading); + TRACE(" dualSrcBlend: %#x.\n", features->dualSrcBlend); + TRACE(" logicOp: %#x.\n", features->logicOp); + TRACE(" multiDrawIndirect: %#x.\n", features->multiDrawIndirect); + TRACE(" drawIndirectFirstInstance: %#x.\n", features->drawIndirectFirstInstance); + TRACE(" depthClamp: %#x.\n", features->depthClamp); + TRACE(" depthBiasClamp: %#x.\n", features->depthBiasClamp); + TRACE(" fillModeNonSolid: %#x.\n", features->fillModeNonSolid); + TRACE(" depthBounds: %#x.\n", features->depthBounds); + TRACE(" wideLines: %#x.\n", features->wideLines); + TRACE(" largePoints: %#x.\n", features->largePoints); + TRACE(" alphaToOne: %#x.\n", features->alphaToOne); + TRACE(" multiViewport: %#x.\n", features->multiViewport); + TRACE(" samplerAnisotropy: %#x.\n", features->samplerAnisotropy); + TRACE(" textureCompressionETC2: %#x.\n", features->textureCompressionETC2); + TRACE(" textureCompressionASTC_LDR: %#x.\n", features->textureCompressionASTC_LDR); + TRACE(" textureCompressionBC: %#x.\n", features->textureCompressionBC); + TRACE(" occlusionQueryPrecise: %#x.\n", features->occlusionQueryPrecise); + TRACE(" pipelineStatisticsQuery: %#x.\n", features->pipelineStatisticsQuery); + TRACE(" vertexOipelineStoresAndAtomics: %#x.\n", features->vertexPipelineStoresAndAtomics); + TRACE(" fragmentStoresAndAtomics: %#x.\n", features->fragmentStoresAndAtomics); + TRACE(" shaderTessellationAndGeometryPointSize: %#x.\n", features->shaderTessellationAndGeometryPointSize); + TRACE(" shaderImageGatherExtended: %#x.\n", features->shaderImageGatherExtended); + TRACE(" shaderStorageImageExtendedFormats: %#x.\n", features->shaderStorageImageExtendedFormats); + TRACE(" shaderStorageImageMultisample: %#x.\n", features->shaderStorageImageMultisample); + TRACE(" shaderStorageImageReadWithoutFormat: %#x.\n", features->shaderStorageImageReadWithoutFormat); + TRACE(" shaderStorageImageWriteWithoutFormat: %#x.\n", features->shaderStorageImageWriteWithoutFormat); + TRACE(" shaderUniformBufferArrayDynamicIndexing: %#x.\n", features->shaderUniformBufferArrayDynamicIndexing); + TRACE(" shaderSampledImageArrayDynamicIndexing: %#x.\n", features->shaderSampledImageArrayDynamicIndexing); + TRACE(" shaderStorageBufferArrayDynamicIndexing: %#x.\n", features->shaderStorageBufferArrayDynamicIndexing); + TRACE(" shaderStorageImageArrayDynamicIndexing: %#x.\n", features->shaderStorageImageArrayDynamicIndexing); + TRACE(" shaderClipDistance: %#x.\n", features->shaderClipDistance); + TRACE(" shaderCullDistance: %#x.\n", features->shaderCullDistance); + TRACE(" shaderFloat64: %#x.\n", features->shaderFloat64); + TRACE(" shaderInt64: %#x.\n", features->shaderInt64); + TRACE(" shaderInt16: %#x.\n", features->shaderInt16); + TRACE(" shaderResourceResidency: %#x.\n", features->shaderResourceResidency); + TRACE(" shaderResourceMinLod: %#x.\n", features->shaderResourceMinLod); + TRACE(" sparseBinding: %#x.\n", features->sparseBinding); + TRACE(" sparseResidencyBuffer: %#x.\n", features->sparseResidencyBuffer); + TRACE(" sparseResidencyImage2D: %#x.\n", features->sparseResidencyImage2D); + TRACE(" sparseResidencyImage3D: %#x.\n", features->sparseResidencyImage3D); + TRACE(" sparseResidency2Samples: %#x.\n", features->sparseResidency2Samples); + TRACE(" sparseResidency4Samples: %#x.\n", features->sparseResidency4Samples); + TRACE(" sparseResidency8Samples: %#x.\n", features->sparseResidency8Samples); + TRACE(" sparseResidency16Samples: %#x.\n", features->sparseResidency16Samples); + TRACE(" sparseResidencyAliased: %#x.\n", features->sparseResidencyAliased); + TRACE(" variableMultisampleRate: %#x.\n", features->variableMultisampleRate); + TRACE(" inheritedQueries: %#x.\n", features->inheritedQueries); + + descriptor_indexing = &info->descriptor_indexing_features; + TRACE(" VkPhysicalDeviceDescriptorIndexingFeaturesEXT:\n"); + + TRACE(" shaderInputAttachmentArrayDynamicIndexing: %#x.\n", + descriptor_indexing->shaderInputAttachmentArrayDynamicIndexing); + TRACE(" shaderUniformTexelBufferArrayDynamicIndexing: %#x.\n", + descriptor_indexing->shaderUniformTexelBufferArrayDynamicIndexing); + TRACE(" shaderStorageTexelBufferArrayDynamicIndexing: %#x.\n", + descriptor_indexing->shaderStorageTexelBufferArrayDynamicIndexing); + + TRACE(" shaderUniformBufferArrayNonUniformIndexing: %#x.\n", + descriptor_indexing->shaderUniformBufferArrayNonUniformIndexing); + TRACE(" shaderSampledImageArrayNonUniformIndexing: %#x.\n", + descriptor_indexing->shaderSampledImageArrayNonUniformIndexing); + TRACE(" shaderStorageBufferArrayNonUniformIndexing: %#x.\n", + descriptor_indexing->shaderStorageBufferArrayNonUniformIndexing); + TRACE(" shaderStorageImageArrayNonUniformIndexing: %#x.\n", + descriptor_indexing->shaderStorageImageArrayNonUniformIndexing); + TRACE(" shaderInputAttachmentArrayNonUniformIndexing: %#x.\n", + descriptor_indexing->shaderInputAttachmentArrayNonUniformIndexing); + TRACE(" shaderUniformTexelBufferArrayNonUniformIndexing: %#x.\n", + descriptor_indexing->shaderUniformTexelBufferArrayNonUniformIndexing); + TRACE(" shaderStorageTexelBufferArrayNonUniformIndexing: %#x.\n", + descriptor_indexing->shaderStorageTexelBufferArrayNonUniformIndexing); + + TRACE(" descriptorBindingUniformBufferUpdateAfterBind: %#x.\n", + descriptor_indexing->descriptorBindingUniformBufferUpdateAfterBind); + TRACE(" descriptorBindingSampledImageUpdateAfterBind: %#x.\n", + descriptor_indexing->descriptorBindingSampledImageUpdateAfterBind); + TRACE(" descriptorBindingStorageImageUpdateAfterBind: %#x.\n", + descriptor_indexing->descriptorBindingStorageImageUpdateAfterBind); + TRACE(" descriptorBindingStorageBufferUpdateAfterBind: %#x.\n", + descriptor_indexing->descriptorBindingStorageBufferUpdateAfterBind); + TRACE(" descriptorBindingUniformTexelBufferUpdateAfterBind: %#x.\n", + descriptor_indexing->descriptorBindingUniformTexelBufferUpdateAfterBind); + TRACE(" descriptorBindingStorageTexelBufferUpdateAfterBind: %#x.\n", + descriptor_indexing->descriptorBindingStorageTexelBufferUpdateAfterBind); + + TRACE(" descriptorBindingUpdateUnusedWhilePending: %#x.\n", + descriptor_indexing->descriptorBindingUpdateUnusedWhilePending); + TRACE(" descriptorBindingPartiallyBound: %#x.\n", + descriptor_indexing->descriptorBindingPartiallyBound); + TRACE(" descriptorBindingVariableDescriptorCount: %#x.\n", + descriptor_indexing->descriptorBindingVariableDescriptorCount); + TRACE(" runtimeDescriptorArray: %#x.\n", + descriptor_indexing->runtimeDescriptorArray); + + conditional_rendering_features = &info->conditional_rendering_features; + TRACE(" VkPhysicalDeviceConditionalRenderingFeaturesEXT:\n"); + TRACE(" conditionalRendering: %#x.\n", conditional_rendering_features->conditionalRendering); + + depth_clip_features = &info->depth_clip_features; + TRACE(" VkPhysicalDeviceDepthClipEnableFeaturesEXT:\n"); + TRACE(" depthClipEnable: %#x.\n", depth_clip_features->depthClipEnable); + + demote_features = &info->demote_features; + TRACE(" VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT:\n"); + TRACE(" shaderDemoteToHelperInvocation: %#x.\n", demote_features->shaderDemoteToHelperInvocation); + + buffer_alignment_features = &info->texel_buffer_alignment_features; + TRACE(" VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT:\n"); + TRACE(" texelBufferAlignment: %#x.\n", buffer_alignment_features->texelBufferAlignment); + + xfb = &info->xfb_features; + TRACE(" VkPhysicalDeviceTransformFeedbackFeaturesEXT:\n"); + TRACE(" transformFeedback: %#x.\n", xfb->transformFeedback); + TRACE(" geometryStreams: %#x.\n", xfb->geometryStreams); + + divisor_features = &info->vertex_divisor_features; + TRACE(" VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT:\n"); + TRACE(" vertexAttributeInstanceRateDivisor: %#x.\n", + divisor_features->vertexAttributeInstanceRateDivisor); + TRACE(" vertexAttributeInstanceRateZeroDivisor: %#x.\n", + divisor_features->vertexAttributeInstanceRateZeroDivisor); +} + +static void vkd3d_init_feature_level(struct vkd3d_vulkan_info *vk_info, + const VkPhysicalDeviceFeatures *features, + const D3D12_FEATURE_DATA_D3D12_OPTIONS *d3d12_options) +{ + bool have_11_0 = true; + +#define CHECK_MIN_REQUIREMENT(name, value) \ + if (vk_info->device_limits.name < value) \ + WARN(#name " does not meet feature level 11_0 requirements.\n"); +#define CHECK_MAX_REQUIREMENT(name, value) \ + if (vk_info->device_limits.name > value) \ + WARN(#name " does not meet feature level 11_0 requirements.\n"); +#define CHECK_FEATURE(name) \ + if (!features->name) \ + { \ + WARN(#name " is not supported.\n"); \ + have_11_0 = false; \ + } + + if (!vk_info->device_limits.timestampComputeAndGraphics) + WARN("Timestamps are not supported on all graphics and compute queues.\n"); + + CHECK_MIN_REQUIREMENT(maxPushConstantsSize, D3D12_MAX_ROOT_COST * sizeof(uint32_t)); + CHECK_MIN_REQUIREMENT(maxComputeSharedMemorySize, D3D12_CS_TGSM_REGISTER_COUNT * sizeof(uint32_t)); + + CHECK_MAX_REQUIREMENT(viewportBoundsRange[0], D3D12_VIEWPORT_BOUNDS_MIN); + CHECK_MIN_REQUIREMENT(viewportBoundsRange[1], D3D12_VIEWPORT_BOUNDS_MAX); + CHECK_MIN_REQUIREMENT(viewportSubPixelBits, 8); + + CHECK_MIN_REQUIREMENT(maxPerStageDescriptorUniformBuffers, + D3D12_COMMONSHADER_CONSTANT_BUFFER_REGISTER_COUNT); + + CHECK_FEATURE(depthBiasClamp); + CHECK_FEATURE(depthClamp); + CHECK_FEATURE(drawIndirectFirstInstance); + CHECK_FEATURE(dualSrcBlend); + CHECK_FEATURE(fragmentStoresAndAtomics); + CHECK_FEATURE(fullDrawIndexUint32); + CHECK_FEATURE(geometryShader); + CHECK_FEATURE(imageCubeArray); + CHECK_FEATURE(independentBlend); + CHECK_FEATURE(multiDrawIndirect); + CHECK_FEATURE(multiViewport); + CHECK_FEATURE(occlusionQueryPrecise); + CHECK_FEATURE(pipelineStatisticsQuery); + CHECK_FEATURE(samplerAnisotropy); + CHECK_FEATURE(sampleRateShading); + CHECK_FEATURE(shaderClipDistance); + CHECK_FEATURE(shaderCullDistance); + CHECK_FEATURE(shaderImageGatherExtended); + CHECK_FEATURE(shaderStorageImageWriteWithoutFormat); + CHECK_FEATURE(tessellationShader); + + if (!vk_info->EXT_depth_clip_enable) + WARN("Depth clip enable is not supported.\n"); + if (!vk_info->EXT_transform_feedback) + WARN("Stream output is not supported.\n"); + + if (!vk_info->EXT_vertex_attribute_divisor) + WARN("Vertex attribute instance rate divisor is not supported.\n"); + else if (!vk_info->vertex_attrib_zero_divisor) + WARN("Vertex attribute instance rate zero divisor is not supported.\n"); + +#undef CHECK_MIN_REQUIREMENT +#undef CHECK_MAX_REQUIREMENT +#undef CHECK_FEATURE + + vk_info->max_feature_level = D3D_FEATURE_LEVEL_11_0; + + if (have_11_0 + && d3d12_options->OutputMergerLogicOp + && features->vertexPipelineStoresAndAtomics + && vk_info->device_limits.maxPerStageDescriptorStorageBuffers >= D3D12_UAV_SLOT_COUNT + && vk_info->device_limits.maxPerStageDescriptorStorageImages >= D3D12_UAV_SLOT_COUNT) + vk_info->max_feature_level = D3D_FEATURE_LEVEL_11_1; + + /* TODO: MinMaxFiltering */ + if (vk_info->max_feature_level >= D3D_FEATURE_LEVEL_11_1 + && d3d12_options->TiledResourcesTier >= D3D12_TILED_RESOURCES_TIER_2 + && d3d12_options->ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_2 + && d3d12_options->TypedUAVLoadAdditionalFormats) + vk_info->max_feature_level = D3D_FEATURE_LEVEL_12_0; + + if (vk_info->max_feature_level >= D3D_FEATURE_LEVEL_12_0 + && d3d12_options->ROVsSupported + && d3d12_options->ConservativeRasterizationTier >= D3D12_CONSERVATIVE_RASTERIZATION_TIER_1) + vk_info->max_feature_level = D3D_FEATURE_LEVEL_12_1; + + TRACE("Max feature level: %#x.\n", vk_info->max_feature_level); +} + +static void vkd3d_device_descriptor_limits_init(struct vkd3d_device_descriptor_limits *limits, + const VkPhysicalDeviceLimits *device_limits) +{ + limits->uniform_buffer_max_descriptors = device_limits->maxDescriptorSetUniformBuffers; + limits->sampled_image_max_descriptors = device_limits->maxDescriptorSetSampledImages; + limits->storage_buffer_max_descriptors = device_limits->maxDescriptorSetStorageBuffers; + limits->storage_image_max_descriptors = device_limits->maxDescriptorSetStorageImages; + limits->sampler_max_descriptors = min(device_limits->maxDescriptorSetSamplers, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); +} + +static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + const struct vkd3d_device_create_info *create_info, + struct vkd3d_physical_device_info *physical_device_info, + uint32_t *device_extension_count, bool **user_extension_supported) +{ + const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; + const struct vkd3d_optional_device_extensions_info *optional_extensions; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; + VkPhysicalDevice physical_device = device->vk_physical_device; + struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; + VkExtensionProperties *vk_extensions; + VkPhysicalDeviceFeatures *features; + uint32_t count; + VkResult vr; + + *device_extension_count = 0; + + vkd3d_trace_physical_device(physical_device, physical_device_info, vk_procs); + vkd3d_trace_physical_device_features(physical_device_info); + vkd3d_trace_physical_device_limits(physical_device_info); + + features = &physical_device_info->features2.features; + + if (!features->sparseResidencyBuffer || !features->sparseResidencyImage2D) + { + features->sparseResidencyBuffer = VK_FALSE; + features->sparseResidencyImage2D = VK_FALSE; + physical_device_info->properties2.properties.sparseProperties.residencyNonResidentStrict = VK_FALSE; + } + + vulkan_info->device_limits = physical_device_info->properties2.properties.limits; + vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; + vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; + vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; + vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1); + + device->feature_options.DoublePrecisionFloatShaderOps = features->shaderFloat64; + device->feature_options.OutputMergerLogicOp = features->logicOp; + /* SPV_KHR_16bit_storage */ + device->feature_options.MinPrecisionSupport = D3D12_SHADER_MIN_PRECISION_SUPPORT_NONE; + + if (!features->sparseBinding) + device->feature_options.TiledResourcesTier = D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED; + else if (!device->vk_info.sparse_properties.residencyNonResidentStrict) + device->feature_options.TiledResourcesTier = D3D12_TILED_RESOURCES_TIER_1; + else if (!features->sparseResidencyImage3D) + device->feature_options.TiledResourcesTier = D3D12_TILED_RESOURCES_TIER_2; + else + device->feature_options.TiledResourcesTier = D3D12_TILED_RESOURCES_TIER_3; + + /* FIXME: Implement tiled resources. */ + if (device->feature_options.TiledResourcesTier) + { + WARN("Tiled resources are not implemented yet.\n"); + device->feature_options.TiledResourcesTier = D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED; + } + + if (device->vk_info.device_limits.maxPerStageDescriptorSamplers <= 16) + device->feature_options.ResourceBindingTier = D3D12_RESOURCE_BINDING_TIER_1; + else if (device->vk_info.device_limits.maxPerStageDescriptorUniformBuffers <= 14) + device->feature_options.ResourceBindingTier = D3D12_RESOURCE_BINDING_TIER_2; + else + device->feature_options.ResourceBindingTier = D3D12_RESOURCE_BINDING_TIER_3; + + device->feature_options.TypedUAVLoadAdditionalFormats = features->shaderStorageImageExtendedFormats; + /* GL_INTEL_fragment_shader_ordering, no Vulkan equivalent. */ + device->feature_options.ROVsSupported = FALSE; + /* GL_INTEL_conservative_rasterization, no Vulkan equivalent. */ + device->feature_options.ConservativeRasterizationTier = D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED; + device->feature_options.MaxGPUVirtualAddressBitsPerResource = 40; /* FIXME */ + device->feature_options.StandardSwizzle64KBSupported = FALSE; + device->feature_options.CrossNodeSharingTier = D3D12_CROSS_NODE_SHARING_TIER_NOT_SUPPORTED; + device->feature_options.CrossAdapterRowMajorTextureSupported = FALSE; + /* SPV_EXT_shader_viewport_index_layer */ + device->feature_options.VPAndRTArrayIndexFromAnyShaderFeedingRasterizerSupportedWithoutGSEmulation = FALSE; + device->feature_options.ResourceHeapTier = D3D12_RESOURCE_HEAP_TIER_2; + + /* Shader Model 6 support. */ + device->feature_options1.WaveOps = FALSE; + device->feature_options1.WaveLaneCountMin = 0; + device->feature_options1.WaveLaneCountMax = 0; + device->feature_options1.TotalLaneCount = 0; + device->feature_options1.ExpandedComputeResourceStates = TRUE; + device->feature_options1.Int64ShaderOps = features->shaderInt64; + + /* Depth bounds test is enabled in D3D12_DEPTH_STENCIL_DESC1, which is not + * supported. */ + device->feature_options2.DepthBoundsTestSupported = FALSE; + /* d3d12_command_list_SetSamplePositions() is not implemented. */ + device->feature_options2.ProgrammableSamplePositionsTier = D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED; + + device->feature_options3.CopyQueueTimestampQueriesSupported = FALSE; + device->feature_options3.CastingFullyTypedFormatSupported = FALSE; + device->feature_options3.WriteBufferImmediateSupportFlags = D3D12_COMMAND_LIST_SUPPORT_FLAG_NONE; + device->feature_options3.ViewInstancingTier = D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED; + device->feature_options3.BarycentricsSupported = FALSE; + + device->feature_options4.MSAA64KBAlignedTextureSupported = FALSE; + device->feature_options4.SharedResourceCompatibilityTier = D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER_0; + /* An SM 6.2 feature. This would require features->shaderInt16 and + * VK_KHR_shader_float16_int8. */ + device->feature_options4.Native16BitShaderOpsSupported = FALSE; + + device->feature_options5.SRVOnlyTiledResourceTier3 = FALSE; + device->feature_options5.RenderPassesTier = D3D12_RENDER_PASS_TIER_0; + device->feature_options5.RaytracingTier = D3D12_RAYTRACING_TIER_NOT_SUPPORTED; + + if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, &count, NULL))) < 0) + { + ERR("Failed to enumerate device extensions, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + if (!count) + return S_OK; + + if (!(vk_extensions = vkd3d_calloc(count, sizeof(*vk_extensions)))) + return E_OUTOFMEMORY; + + TRACE("Enumerating %u device extensions.\n", count); + if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, &count, vk_extensions))) < 0) + { + ERR("Failed to enumerate device extensions, vr %d.\n", vr); + vkd3d_free(vk_extensions); + return hresult_from_vk_result(vr); + } + + optional_extensions = vkd3d_find_struct(create_info->next, OPTIONAL_DEVICE_EXTENSIONS_INFO); + if (optional_extensions && optional_extensions->extension_count) + { + if (!(*user_extension_supported = vkd3d_calloc(optional_extensions->extension_count, sizeof(bool)))) + { + vkd3d_free(vk_extensions); + return E_OUTOFMEMORY; + } + } + else + { + *user_extension_supported = NULL; + } + + *device_extension_count = vkd3d_check_extensions(vk_extensions, count, + required_device_extensions, ARRAY_SIZE(required_device_extensions), + optional_device_extensions, ARRAY_SIZE(optional_device_extensions), + create_info->device_extensions, create_info->device_extension_count, + optional_extensions ? optional_extensions->extensions : NULL, + optional_extensions ? optional_extensions->extension_count : 0, + *user_extension_supported, vulkan_info, "device", + device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); + + if (!physical_device_info->conditional_rendering_features.conditionalRendering) + vulkan_info->EXT_conditional_rendering = false; + if (!physical_device_info->depth_clip_features.depthClipEnable) + vulkan_info->EXT_depth_clip_enable = false; + if (!physical_device_info->demote_features.shaderDemoteToHelperInvocation) + vulkan_info->EXT_shader_demote_to_helper_invocation = false; + if (!physical_device_info->texel_buffer_alignment_features.texelBufferAlignment) + vulkan_info->EXT_texel_buffer_alignment = false; + + vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties; + + if (get_spec_version(vk_extensions, count, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) >= 3) + { + const VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *divisor_features; + divisor_features = &physical_device_info->vertex_divisor_features; + if (!divisor_features->vertexAttributeInstanceRateDivisor) + vulkan_info->EXT_vertex_attribute_divisor = false; + vulkan_info->vertex_attrib_zero_divisor = divisor_features->vertexAttributeInstanceRateZeroDivisor; + } + else + { + vulkan_info->vertex_attrib_zero_divisor = false; + } + + vkd3d_free(vk_extensions); + + device->feature_options.PSSpecifiedStencilRefSupported = vulkan_info->EXT_shader_stencil_export; + + vkd3d_init_feature_level(vulkan_info, features, &device->feature_options); + if (vulkan_info->max_feature_level < create_info->minimum_feature_level) + { + WARN("Feature level %#x is not supported.\n", create_info->minimum_feature_level); + vkd3d_free(*user_extension_supported); + *user_extension_supported = NULL; + return E_INVALIDARG; + } + + /* Shader extensions. */ + if (vulkan_info->EXT_shader_demote_to_helper_invocation) + { + vulkan_info->shader_extension_count = 1; + vulkan_info->shader_extensions[0] = VKD3D_SHADER_SPIRV_EXTENSION_EXT_DEMOTE_TO_HELPER_INVOCATION; + } + + if (vulkan_info->EXT_descriptor_indexing) + vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] + = VKD3D_SHADER_SPIRV_EXTENSION_EXT_DESCRIPTOR_INDEXING; + + if (vulkan_info->EXT_shader_stencil_export) + vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] + = VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT; + + /* Disable unused Vulkan features. */ + features->shaderTessellationAndGeometryPointSize = VK_FALSE; + + descriptor_indexing = &physical_device_info->descriptor_indexing_features; + if (descriptor_indexing) + { + descriptor_indexing->shaderInputAttachmentArrayDynamicIndexing = VK_FALSE; + descriptor_indexing->shaderInputAttachmentArrayNonUniformIndexing = VK_FALSE; + + /* We do not use storage buffers currently. */ + features->shaderStorageBufferArrayDynamicIndexing = VK_FALSE; + descriptor_indexing->shaderStorageBufferArrayNonUniformIndexing = VK_FALSE; + descriptor_indexing->descriptorBindingStorageBufferUpdateAfterBind = VK_FALSE; + } + + if (vulkan_info->EXT_descriptor_indexing && descriptor_indexing + && (descriptor_indexing->descriptorBindingUniformBufferUpdateAfterBind + || descriptor_indexing->descriptorBindingStorageBufferUpdateAfterBind + || descriptor_indexing->descriptorBindingUniformTexelBufferUpdateAfterBind + || descriptor_indexing->descriptorBindingStorageTexelBufferUpdateAfterBind) + && !physical_device_info->descriptor_indexing_properties.robustBufferAccessUpdateAfterBind) + { + WARN("Disabling robust buffer access for the update after bind feature.\n"); + features->robustBufferAccess = VK_FALSE; + } + + vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->properties2.properties.limits); + + return S_OK; +} + +static HRESULT vkd3d_select_physical_device(struct vkd3d_instance *instance, + unsigned int device_index, VkPhysicalDevice *selected_device) +{ + VkPhysicalDevice dgpu_device = VK_NULL_HANDLE, igpu_device = VK_NULL_HANDLE; + const struct vkd3d_vk_instance_procs *vk_procs = &instance->vk_procs; + VkInstance vk_instance = instance->vk_instance; + VkPhysicalDeviceProperties device_properties; + VkPhysicalDevice device = VK_NULL_HANDLE; + VkPhysicalDevice *physical_devices; + uint32_t count; + unsigned int i; + VkResult vr; + + count = 0; + if ((vr = VK_CALL(vkEnumeratePhysicalDevices(vk_instance, &count, NULL))) < 0) + { + ERR("Failed to enumerate physical devices, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + if (!count) + { + ERR("No physical device available.\n"); + return E_FAIL; + } + if (!(physical_devices = vkd3d_calloc(count, sizeof(*physical_devices)))) + return E_OUTOFMEMORY; + + TRACE("Enumerating %u physical device(s).\n", count); + if ((vr = VK_CALL(vkEnumeratePhysicalDevices(vk_instance, &count, physical_devices))) < 0) + { + ERR("Failed to enumerate physical devices, vr %d.\n", vr); + vkd3d_free(physical_devices); + return hresult_from_vk_result(vr); + } + + if (device_index != ~0u && device_index >= count) + WARN("Device index %u is out of range.\n", device_index); + + for (i = 0; i < count; ++i) + { + VK_CALL(vkGetPhysicalDeviceProperties(physical_devices[i], &device_properties)); + vkd3d_trace_physical_device_properties(&device_properties); + + if (i == device_index) + device = physical_devices[i]; + + if (device_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU && !dgpu_device) + dgpu_device = physical_devices[i]; + else if (device_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU && !igpu_device) + igpu_device = physical_devices[i]; + } + + if (!device) + device = dgpu_device ? dgpu_device : igpu_device; + if (!device) + device = physical_devices[0]; + + vkd3d_free(physical_devices); + + VK_CALL(vkGetPhysicalDeviceProperties(device, &device_properties)); + TRACE("Using device: %s, %#x:%#x.\n", device_properties.deviceName, + device_properties.vendorID, device_properties.deviceID); + + *selected_device = device; + + return S_OK; +} + +/* Vulkan queues */ +enum vkd3d_queue_family +{ + VKD3D_QUEUE_FAMILY_DIRECT, + VKD3D_QUEUE_FAMILY_COMPUTE, + VKD3D_QUEUE_FAMILY_TRANSFER, + + VKD3D_QUEUE_FAMILY_COUNT, +}; + +struct vkd3d_device_queue_info +{ + unsigned int family_index[VKD3D_QUEUE_FAMILY_COUNT]; + VkQueueFamilyProperties vk_properties[VKD3D_QUEUE_FAMILY_COUNT]; + + unsigned int vk_family_count; + VkDeviceQueueCreateInfo vk_queue_create_info[VKD3D_QUEUE_FAMILY_COUNT]; +}; + +static void d3d12_device_destroy_vkd3d_queues(struct d3d12_device *device) +{ + if (device->direct_queue) + vkd3d_queue_destroy(device->direct_queue, device); + if (device->compute_queue && device->compute_queue != device->direct_queue) + vkd3d_queue_destroy(device->compute_queue, device); + if (device->copy_queue && device->copy_queue != device->direct_queue + && device->copy_queue != device->compute_queue) + vkd3d_queue_destroy(device->copy_queue, device); + + device->direct_queue = NULL; + device->compute_queue = NULL; + device->copy_queue = NULL; +} + +static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, + const struct vkd3d_device_queue_info *queue_info) +{ + uint32_t transfer_family_index = queue_info->family_index[VKD3D_QUEUE_FAMILY_TRANSFER]; + uint32_t compute_family_index = queue_info->family_index[VKD3D_QUEUE_FAMILY_COMPUTE]; + uint32_t direct_family_index = queue_info->family_index[VKD3D_QUEUE_FAMILY_DIRECT]; + HRESULT hr; + + device->direct_queue = NULL; + device->compute_queue = NULL; + device->copy_queue = NULL; + + device->queue_family_count = 0; + memset(device->queue_family_indices, 0, sizeof(device->queue_family_indices)); + + if (SUCCEEDED((hr = vkd3d_queue_create(device, direct_family_index, + &queue_info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT], &device->direct_queue)))) + device->queue_family_indices[device->queue_family_count++] = direct_family_index; + else + goto out_destroy_queues; + + if (compute_family_index == direct_family_index) + device->compute_queue = device->direct_queue; + else if (SUCCEEDED(hr = vkd3d_queue_create(device, compute_family_index, + &queue_info->vk_properties[VKD3D_QUEUE_FAMILY_COMPUTE], &device->compute_queue))) + device->queue_family_indices[device->queue_family_count++] = compute_family_index; + else + goto out_destroy_queues; + + if (transfer_family_index == direct_family_index) + device->copy_queue = device->direct_queue; + else if (transfer_family_index == compute_family_index) + device->copy_queue = device->compute_queue; + else if (SUCCEEDED(hr = vkd3d_queue_create(device, transfer_family_index, + &queue_info->vk_properties[VKD3D_QUEUE_FAMILY_TRANSFER], &device->copy_queue))) + device->queue_family_indices[device->queue_family_count++] = transfer_family_index; + else + goto out_destroy_queues; + + device->feature_options3.CopyQueueTimestampQueriesSupported = !!device->copy_queue->timestamp_bits; + + return S_OK; + +out_destroy_queues: + d3d12_device_destroy_vkd3d_queues(device); + return hr; +} + +static float queue_priorities[] = {1.0f}; + +static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance, + VkPhysicalDevice physical_device, struct vkd3d_device_queue_info *info) +{ + const struct vkd3d_vk_instance_procs *vk_procs = &vkd3d_instance->vk_procs; + VkQueueFamilyProperties *queue_properties = NULL; + VkDeviceQueueCreateInfo *queue_info = NULL; + unsigned int i; + uint32_t count; + + memset(info, 0, sizeof(*info)); + for (i = 0; i < ARRAY_SIZE(info->family_index); ++i) + info->family_index[i] = ~0u; + + VK_CALL(vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &count, NULL)); + if (!(queue_properties = vkd3d_calloc(count, sizeof(*queue_properties)))) + return E_OUTOFMEMORY; + VK_CALL(vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &count, queue_properties)); + + for (i = 0; i < count; ++i) + { + enum vkd3d_queue_family vkd3d_family = VKD3D_QUEUE_FAMILY_COUNT; + + if ((queue_properties[i].queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) + == (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) + { + vkd3d_family = VKD3D_QUEUE_FAMILY_DIRECT; + } + if ((queue_properties[i].queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) + == VK_QUEUE_COMPUTE_BIT) + { + vkd3d_family = VKD3D_QUEUE_FAMILY_COMPUTE; + } + if ((queue_properties[i].queueFlags & ~VK_QUEUE_SPARSE_BINDING_BIT) == VK_QUEUE_TRANSFER_BIT) + { + vkd3d_family = VKD3D_QUEUE_FAMILY_TRANSFER; + } + + if (vkd3d_family == VKD3D_QUEUE_FAMILY_COUNT) + continue; + + info->family_index[vkd3d_family] = i; + info->vk_properties[vkd3d_family] = queue_properties[i]; + queue_info = &info->vk_queue_create_info[vkd3d_family]; + + queue_info->sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue_info->pNext = NULL; + queue_info->flags = 0; + queue_info->queueFamilyIndex = i; + queue_info->queueCount = 1; /* FIXME: Use multiple queues. */ + queue_info->pQueuePriorities = queue_priorities; + } + + vkd3d_free(queue_properties); + + if (info->family_index[VKD3D_QUEUE_FAMILY_DIRECT] == ~0u) + { + FIXME("Could not find a suitable queue family for a direct command queue.\n"); + return E_FAIL; + } + + /* No compute-only queue family, reuse the direct queue family with graphics and compute. */ + if (info->family_index[VKD3D_QUEUE_FAMILY_COMPUTE] == ~0u) + { + info->family_index[VKD3D_QUEUE_FAMILY_COMPUTE] = info->family_index[VKD3D_QUEUE_FAMILY_DIRECT]; + info->vk_properties[VKD3D_QUEUE_FAMILY_COMPUTE] = info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT]; + } + if (info->family_index[VKD3D_QUEUE_FAMILY_TRANSFER] == ~0u) + { + info->family_index[VKD3D_QUEUE_FAMILY_TRANSFER] = info->family_index[VKD3D_QUEUE_FAMILY_DIRECT]; + info->vk_properties[VKD3D_QUEUE_FAMILY_TRANSFER] = info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT]; + } + + /* Compact the array. */ + info->vk_family_count = 1; + for (i = info->vk_family_count; i < ARRAY_SIZE(info->vk_queue_create_info); ++i) + { + if (info->vk_queue_create_info[i].queueCount) + info->vk_queue_create_info[info->vk_family_count++] = info->vk_queue_create_info[i]; + } + + return S_OK; +} + +/* The 4 MiB alignment requirement for MSAA resources was lowered to 64KB on + * hardware that supports it. This is distinct from the small MSAA requirement + * which applies to resources of a total size of 4 MiB or less. */ +static bool d3d12_is_64k_msaa_supported(struct d3d12_device *device) +{ + D3D12_RESOURCE_ALLOCATION_INFO info; + D3D12_RESOURCE_DESC resource_desc; + + memset(&resource_desc, 0, sizeof(resource_desc)); + resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + resource_desc.Width = 1024; + resource_desc.Height = 1025; + resource_desc.DepthOrArraySize = 1; + resource_desc.MipLevels = 1; + resource_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + resource_desc.SampleDesc.Count = 4; + resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + + /* FIXME: in some cases Vulkan requires 0x20000 or more for non-MSAA + * resources, which must have 0x10000 in their description, so we might + * reasonably return true here for 0x20000 or 0x40000. */ + return SUCCEEDED(vkd3d_get_image_allocation_info(device, &resource_desc, &info)) + && info.Alignment <= 0x10000; +} + +static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, + const struct vkd3d_device_create_info *create_info) +{ + const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; + const struct vkd3d_optional_device_extensions_info *optional_extensions; + struct vkd3d_physical_device_info physical_device_info; + struct vkd3d_device_queue_info device_queue_info; + bool *user_extension_supported = NULL; + VkPhysicalDevice physical_device; + VkDeviceCreateInfo device_info; + unsigned int device_index; + uint32_t extension_count; + const char **extensions; + VkDevice vk_device; + VkResult vr; + HRESULT hr; + + TRACE("device %p, create_info %p.\n", device, create_info); + + physical_device = create_info->vk_physical_device; + device_index = vkd3d_env_var_as_uint("VKD3D_VULKAN_DEVICE", ~0u); + if ((!physical_device || device_index != ~0u) + && FAILED(hr = vkd3d_select_physical_device(device->vkd3d_instance, device_index, &physical_device))) + return hr; + + device->vk_physical_device = physical_device; + + if (FAILED(hr = vkd3d_select_queues(device->vkd3d_instance, physical_device, &device_queue_info))) + return hr; + + TRACE("Using queue family %u for direct command queues.\n", + device_queue_info.family_index[VKD3D_QUEUE_FAMILY_DIRECT]); + TRACE("Using queue family %u for compute command queues.\n", + device_queue_info.family_index[VKD3D_QUEUE_FAMILY_COMPUTE]); + TRACE("Using queue family %u for copy command queues.\n", + device_queue_info.family_index[VKD3D_QUEUE_FAMILY_TRANSFER]); + + VK_CALL(vkGetPhysicalDeviceMemoryProperties(physical_device, &device->memory_properties)); + + vkd3d_physical_device_info_init(&physical_device_info, device); + + if (FAILED(hr = vkd3d_init_device_caps(device, create_info, &physical_device_info, + &extension_count, &user_extension_supported))) + return hr; + + if (!(extensions = vkd3d_calloc(extension_count, sizeof(*extensions)))) + { + vkd3d_free(user_extension_supported); + return E_OUTOFMEMORY; + } + + optional_extensions = vkd3d_find_struct(create_info->next, OPTIONAL_DEVICE_EXTENSIONS_INFO); + + /* Create device */ + device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + device_info.pNext = physical_device_info.features2.pNext; + device_info.flags = 0; + device_info.queueCreateInfoCount = device_queue_info.vk_family_count; + device_info.pQueueCreateInfos = device_queue_info.vk_queue_create_info; + device_info.enabledLayerCount = 0; + device_info.ppEnabledLayerNames = NULL; + device_info.enabledExtensionCount = vkd3d_enable_extensions(extensions, + required_device_extensions, ARRAY_SIZE(required_device_extensions), + optional_device_extensions, ARRAY_SIZE(optional_device_extensions), + create_info->device_extensions, create_info->device_extension_count, + optional_extensions ? optional_extensions->extensions : NULL, + optional_extensions ? optional_extensions->extension_count : 0, + user_extension_supported, &device->vk_info); + device_info.ppEnabledExtensionNames = extensions; + device_info.pEnabledFeatures = &physical_device_info.features2.features; + vkd3d_free(user_extension_supported); + + vr = VK_CALL(vkCreateDevice(physical_device, &device_info, NULL, &vk_device)); + vkd3d_free(extensions); + if (vr < 0) + { + ERR("Failed to create Vulkan device, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + if (FAILED(hr = vkd3d_load_vk_device_procs(&device->vk_procs, vk_procs, vk_device))) + { + ERR("Failed to load device procs, hr %#x.\n", hr); + if (device->vk_procs.vkDestroyDevice) + device->vk_procs.vkDestroyDevice(vk_device, NULL); + return hr; + } + + device->vk_device = vk_device; + + if (FAILED(hr = d3d12_device_create_vkd3d_queues(device, &device_queue_info))) + { + ERR("Failed to create queues, hr %#x.\n", hr); + device->vk_procs.vkDestroyDevice(vk_device, NULL); + return hr; + } + + device->feature_options4.MSAA64KBAlignedTextureSupported = d3d12_is_64k_msaa_supported(device); + + TRACE("Created Vulkan device %p.\n", vk_device); + + return hr; +} + +static HRESULT d3d12_device_init_pipeline_cache(struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkPipelineCacheCreateInfo cache_info; + VkResult vr; + int rc; + + if ((rc = vkd3d_mutex_init(&device->mutex))) + { + ERR("Failed to initialize mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; + cache_info.pNext = NULL; + cache_info.flags = 0; + cache_info.initialDataSize = 0; + cache_info.pInitialData = NULL; + if ((vr = VK_CALL(vkCreatePipelineCache(device->vk_device, &cache_info, NULL, + &device->vk_pipeline_cache))) < 0) + { + ERR("Failed to create Vulkan pipeline cache, vr %d.\n", vr); + device->vk_pipeline_cache = VK_NULL_HANDLE; + } + + return S_OK; +} + +static void d3d12_device_destroy_pipeline_cache(struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + if (device->vk_pipeline_cache) + VK_CALL(vkDestroyPipelineCache(device->vk_device, device->vk_pipeline_cache, NULL)); + + vkd3d_mutex_destroy(&device->mutex); +} + +#define VKD3D_VA_FALLBACK_BASE 0x8000000000000000ull +#define VKD3D_VA_SLAB_BASE 0x0000001000000000ull +#define VKD3D_VA_SLAB_SIZE_SHIFT 32 +#define VKD3D_VA_SLAB_SIZE (1ull << VKD3D_VA_SLAB_SIZE_SHIFT) +#define VKD3D_VA_SLAB_COUNT (64 * 1024) + +static D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate_slab(struct vkd3d_gpu_va_allocator *allocator, + size_t aligned_size, void *ptr) +{ + struct vkd3d_gpu_va_slab *slab; + D3D12_GPU_VIRTUAL_ADDRESS address; + unsigned slab_idx; + + slab = allocator->free_slab; + allocator->free_slab = slab->ptr; + slab->size = aligned_size; + slab->ptr = ptr; + + /* It is critical that the multiplication happens in 64-bit to not + * overflow. */ + slab_idx = slab - allocator->slabs; + address = VKD3D_VA_SLAB_BASE + slab_idx * VKD3D_VA_SLAB_SIZE; + + TRACE("Allocated address %#"PRIx64", slab %u, size %zu.\n", address, slab_idx, aligned_size); + + return address; +} + +static D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate_fallback(struct vkd3d_gpu_va_allocator *allocator, + size_t alignment, size_t aligned_size, void *ptr) +{ + struct vkd3d_gpu_va_allocation *allocation; + D3D12_GPU_VIRTUAL_ADDRESS base, ceiling; + + base = allocator->fallback_floor; + ceiling = ~(D3D12_GPU_VIRTUAL_ADDRESS)0; + ceiling -= alignment - 1; + if (aligned_size > ceiling || ceiling - aligned_size < base) + return 0; + + base = (base + (alignment - 1)) & ~((D3D12_GPU_VIRTUAL_ADDRESS)alignment - 1); + + if (!vkd3d_array_reserve((void **)&allocator->fallback_allocations, &allocator->fallback_allocations_size, + allocator->fallback_allocation_count + 1, sizeof(*allocator->fallback_allocations))) + return 0; + + allocation = &allocator->fallback_allocations[allocator->fallback_allocation_count++]; + allocation->base = base; + allocation->size = aligned_size; + allocation->ptr = ptr; + + /* This pointer is bumped and never lowered on a free. However, this will + * only fail once we have exhausted 63 bits of address space. */ + allocator->fallback_floor = base + aligned_size; + + TRACE("Allocated address %#"PRIx64", size %zu.\n", base, aligned_size); + + return base; +} + +D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_allocator *allocator, + size_t alignment, size_t size, void *ptr) +{ + D3D12_GPU_VIRTUAL_ADDRESS address; + int rc; + + if (size > ~(size_t)0 - (alignment - 1)) + return 0; + size = align(size, alignment); + + if ((rc = vkd3d_mutex_lock(&allocator->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return 0; + } + + if (size <= VKD3D_VA_SLAB_SIZE && allocator->free_slab) + address = vkd3d_gpu_va_allocator_allocate_slab(allocator, size, ptr); + else + address = vkd3d_gpu_va_allocator_allocate_fallback(allocator, alignment, size, ptr); + + vkd3d_mutex_unlock(&allocator->mutex); + + return address; +} + +static void *vkd3d_gpu_va_allocator_dereference_slab(struct vkd3d_gpu_va_allocator *allocator, + D3D12_GPU_VIRTUAL_ADDRESS address) +{ + const struct vkd3d_gpu_va_slab *slab; + D3D12_GPU_VIRTUAL_ADDRESS base_offset; + unsigned int slab_idx; + + base_offset = address - VKD3D_VA_SLAB_BASE; + slab_idx = base_offset >> VKD3D_VA_SLAB_SIZE_SHIFT; + + if (slab_idx >= VKD3D_VA_SLAB_COUNT) + { + ERR("Invalid slab index %u for address %#"PRIx64".\n", slab_idx, address); + return NULL; + } + + slab = &allocator->slabs[slab_idx]; + base_offset -= slab_idx * VKD3D_VA_SLAB_SIZE; + if (base_offset >= slab->size) + { + ERR("Address %#"PRIx64" is %#"PRIx64" bytes into slab %u of size %zu.\n", + address, base_offset, slab_idx, slab->size); + return NULL; + } + return slab->ptr; +} + +static int vkd3d_gpu_va_allocation_compare(const void *k, const void *e) +{ + const struct vkd3d_gpu_va_allocation *allocation = e; + const D3D12_GPU_VIRTUAL_ADDRESS *address = k; + + if (*address < allocation->base) + return -1; + if (*address - allocation->base >= allocation->size) + return 1; + return 0; +} + +static void *vkd3d_gpu_va_allocator_dereference_fallback(struct vkd3d_gpu_va_allocator *allocator, + D3D12_GPU_VIRTUAL_ADDRESS address) +{ + struct vkd3d_gpu_va_allocation *allocation; + + allocation = bsearch(&address, allocator->fallback_allocations, allocator->fallback_allocation_count, + sizeof(*allocation), vkd3d_gpu_va_allocation_compare); + + return allocation ? allocation->ptr : NULL; +} + +void *vkd3d_gpu_va_allocator_dereference(struct vkd3d_gpu_va_allocator *allocator, + D3D12_GPU_VIRTUAL_ADDRESS address) +{ + void *ret; + int rc; + + /* If we land in the non-fallback region, dereferencing VA is lock-less. + * The base pointer is immutable, and the only way we can have a data race + * is if some other thread is poking into the + * slab_mem_allocation[base_index] block. This can only happen if someone + * is trying to free the entry while we're dereferencing it, which would + * be a serious application bug. */ + if (address < VKD3D_VA_FALLBACK_BASE) + return vkd3d_gpu_va_allocator_dereference_slab(allocator, address); + + /* Slow fallback. */ + if ((rc = vkd3d_mutex_lock(&allocator->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return NULL; + } + + ret = vkd3d_gpu_va_allocator_dereference_fallback(allocator, address); + + vkd3d_mutex_unlock(&allocator->mutex); + + return ret; +} + +static void vkd3d_gpu_va_allocator_free_slab(struct vkd3d_gpu_va_allocator *allocator, + D3D12_GPU_VIRTUAL_ADDRESS address) +{ + D3D12_GPU_VIRTUAL_ADDRESS base_offset; + struct vkd3d_gpu_va_slab *slab; + unsigned int slab_idx; + + base_offset = address - VKD3D_VA_SLAB_BASE; + slab_idx = base_offset >> VKD3D_VA_SLAB_SIZE_SHIFT; + + if (slab_idx >= VKD3D_VA_SLAB_COUNT) + { + ERR("Invalid slab index %u for address %#"PRIx64".\n", slab_idx, address); + return; + } + + TRACE("Freeing address %#"PRIx64", slab %u.\n", address, slab_idx); + + slab = &allocator->slabs[slab_idx]; + slab->size = 0; + slab->ptr = allocator->free_slab; + allocator->free_slab = slab; +} + +static void vkd3d_gpu_va_allocator_free_fallback(struct vkd3d_gpu_va_allocator *allocator, + D3D12_GPU_VIRTUAL_ADDRESS address) +{ + struct vkd3d_gpu_va_allocation *allocation; + unsigned int index; + + allocation = bsearch(&address, allocator->fallback_allocations, allocator->fallback_allocation_count, + sizeof(*allocation), vkd3d_gpu_va_allocation_compare); + + if (!allocation || allocation->base != address) + { + ERR("Address %#"PRIx64" does not match any allocation.\n", address); + return; + } + + index = allocation - allocator->fallback_allocations; + --allocator->fallback_allocation_count; + if (index != allocator->fallback_allocation_count) + memmove(&allocator->fallback_allocations[index], &allocator->fallback_allocations[index + 1], + (allocator->fallback_allocation_count - index) * sizeof(*allocation)); +} + +void vkd3d_gpu_va_allocator_free(struct vkd3d_gpu_va_allocator *allocator, D3D12_GPU_VIRTUAL_ADDRESS address) +{ + int rc; + + if ((rc = vkd3d_mutex_lock(&allocator->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + if (address < VKD3D_VA_FALLBACK_BASE) + { + vkd3d_gpu_va_allocator_free_slab(allocator, address); + vkd3d_mutex_unlock(&allocator->mutex); + return; + } + + vkd3d_gpu_va_allocator_free_fallback(allocator, address); + + vkd3d_mutex_unlock(&allocator->mutex); +} + +static bool vkd3d_gpu_va_allocator_init(struct vkd3d_gpu_va_allocator *allocator) +{ + unsigned int i; + int rc; + + memset(allocator, 0, sizeof(*allocator)); + allocator->fallback_floor = VKD3D_VA_FALLBACK_BASE; + + /* To remain lock-less, we cannot grow the slabs array after the fact. If + * we commit to a maximum number of allocations here, we can dereference + * without taking a lock as the base pointer never changes. We would be + * able to grow more seamlessly using an array of pointers, but that would + * make dereferencing slightly less efficient. */ + if (!(allocator->slabs = vkd3d_calloc(VKD3D_VA_SLAB_COUNT, sizeof(*allocator->slabs)))) + return false; + + /* Mark all slabs as free. */ + allocator->free_slab = &allocator->slabs[0]; + for (i = 0; i < VKD3D_VA_SLAB_COUNT - 1; ++i) + { + allocator->slabs[i].ptr = &allocator->slabs[i + 1]; + } + + if ((rc = vkd3d_mutex_init(&allocator->mutex))) + { + ERR("Failed to initialize mutex, error %d.\n", rc); + vkd3d_free(allocator->slabs); + return false; + } + + return true; +} + +static void vkd3d_gpu_va_allocator_cleanup(struct vkd3d_gpu_va_allocator *allocator) +{ + int rc; + + if ((rc = vkd3d_mutex_lock(&allocator->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + vkd3d_free(allocator->slabs); + vkd3d_free(allocator->fallback_allocations); + vkd3d_mutex_unlock(&allocator->mutex); + vkd3d_mutex_destroy(&allocator->mutex); +} + +/* We could use bsearch() or recursion here, but it probably helps to omit + * all the extra function calls. */ +static struct vkd3d_gpu_descriptor_allocation *vkd3d_gpu_descriptor_allocator_binary_search( + const struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc) +{ + struct vkd3d_gpu_descriptor_allocation *allocations = allocator->allocations; + const struct d3d12_desc *base; + size_t centre, count; + + for (count = allocator->allocation_count; count > 1; ) + { + centre = count >> 1; + base = allocations[centre].base; + if (base <= desc) + { + allocations += centre; + count -= centre; + } + else + { + count = centre; + } + } + + return allocations; +} + +bool vkd3d_gpu_descriptor_allocator_register_range(struct vkd3d_gpu_descriptor_allocator *allocator, + const struct d3d12_desc *base, size_t count) +{ + struct vkd3d_gpu_descriptor_allocation *allocation; + int rc; + + if ((rc = vkd3d_mutex_lock(&allocator->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return false; + } + + if (!vkd3d_array_reserve((void **)&allocator->allocations, &allocator->allocations_size, + allocator->allocation_count + 1, sizeof(*allocator->allocations))) + { + vkd3d_mutex_unlock(&allocator->mutex); + return false; + } + + if (allocator->allocation_count > 1) + allocation = vkd3d_gpu_descriptor_allocator_binary_search(allocator, base); + else + allocation = allocator->allocations; + allocation += allocator->allocation_count && base > allocation->base; + memmove(&allocation[1], allocation, (allocator->allocation_count++ - (allocation - allocator->allocations)) + * sizeof(*allocation)); + + allocation->base = base; + allocation->count = count; + + vkd3d_mutex_unlock(&allocator->mutex); + + return true; +} + +bool vkd3d_gpu_descriptor_allocator_unregister_range( + struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *base) +{ + bool found; + size_t i; + int rc; + + if ((rc = vkd3d_mutex_lock(&allocator->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return false; + } + + for (i = 0, found = false; i < allocator->allocation_count; ++i) + { + if (allocator->allocations[i].base != base) + continue; + + memmove(&allocator->allocations[i], &allocator->allocations[i + 1], + (--allocator->allocation_count - i) * sizeof(allocator->allocations[0])); + + found = true; + break; + } + + vkd3d_mutex_unlock(&allocator->mutex); + + return found; +} + +static inline const struct vkd3d_gpu_descriptor_allocation *vkd3d_gpu_descriptor_allocator_allocation_from_descriptor( + const struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc) +{ + const struct vkd3d_gpu_descriptor_allocation *allocation; + + allocation = vkd3d_gpu_descriptor_allocator_binary_search(allocator, desc); + return (desc >= allocation->base && desc - allocation->base < allocation->count) ? allocation : NULL; +} + +/* Return the available size from the specified descriptor to the heap end. */ +size_t vkd3d_gpu_descriptor_allocator_range_size_from_descriptor( + struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc) +{ + const struct vkd3d_gpu_descriptor_allocation *allocation; + size_t remaining; + int rc; + + assert(allocator->allocation_count); + + if ((rc = vkd3d_mutex_lock(&allocator->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return 0; + } + + remaining = 0; + if ((allocation = vkd3d_gpu_descriptor_allocator_allocation_from_descriptor(allocator, desc))) + remaining = allocation->count - (desc - allocation->base); + + vkd3d_mutex_unlock(&allocator->mutex); + + return remaining; +} + +struct d3d12_descriptor_heap *vkd3d_gpu_descriptor_allocator_heap_from_descriptor( + struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc) +{ + const struct vkd3d_gpu_descriptor_allocation *allocation; + int rc; + + if (!allocator->allocation_count) + return NULL; + + if ((rc = vkd3d_mutex_lock(&allocator->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return NULL; + } + + allocation = vkd3d_gpu_descriptor_allocator_allocation_from_descriptor(allocator, desc); + + vkd3d_mutex_unlock(&allocator->mutex); + + return allocation ? CONTAINING_RECORD(allocation->base, struct d3d12_descriptor_heap, descriptors) + : NULL; +} + +static bool vkd3d_gpu_descriptor_allocator_init(struct vkd3d_gpu_descriptor_allocator *allocator) +{ + int rc; + + memset(allocator, 0, sizeof(*allocator)); + if ((rc = vkd3d_mutex_init(&allocator->mutex))) + { + ERR("Failed to initialise mutex, error %d.\n", rc); + return false; + } + + return true; +} + +static void vkd3d_gpu_descriptor_allocator_cleanup(struct vkd3d_gpu_descriptor_allocator *allocator) +{ + vkd3d_free(allocator->allocations); + vkd3d_mutex_destroy(&allocator->mutex); +} + +static bool have_vk_time_domain(VkTimeDomainEXT *domains, unsigned int count, VkTimeDomainEXT domain) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + if (domains[i] == domain) + return true; + + return false; +} + +static void vkd3d_time_domains_init(struct d3d12_device *device) +{ + static const VkTimeDomainEXT host_time_domains[] = + { + /* In order of preference */ + VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT, + VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT, + VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT, + + }; + const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; + VkTimeDomainEXT domains[8]; + unsigned int i; + uint32_t count; + VkResult vr; + + device->vk_host_time_domain = -1; + + if (!device->vk_info.EXT_calibrated_timestamps) + return; + + count = ARRAY_SIZE(domains); + if ((vr = VK_CALL(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT(device->vk_physical_device, + &count, domains))) != VK_SUCCESS && vr != VK_INCOMPLETE) + { + WARN("Failed to get calibrated time domains, vr %d.\n", vr); + return; + } + + if (vr == VK_INCOMPLETE) + FIXME("Calibrated time domain list is incomplete.\n"); + + if (!have_vk_time_domain(domains, count, VK_TIME_DOMAIN_DEVICE_EXT)) + { + WARN("Device time domain not found. Calibrated timestamps will not be available.\n"); + return; + } + + for (i = 0; i < ARRAY_SIZE(host_time_domains); ++i) + { + if (!have_vk_time_domain(domains, count, host_time_domains[i])) + continue; + device->vk_host_time_domain = host_time_domains[i]; + break; + } + if (device->vk_host_time_domain == -1) + WARN("Found no acceptable host time domain. Calibrated timestamps will not be available.\n"); +} + +static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, + const struct vkd3d_device_descriptor_limits *limits) +{ + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); + pool_sizes[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + pool_sizes[1].descriptorCount = min(limits->sampled_image_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); + pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + pool_sizes[2].descriptorCount = pool_sizes[1].descriptorCount; + pool_sizes[3].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + pool_sizes[3].descriptorCount = min(limits->storage_image_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); + pool_sizes[4].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + pool_sizes[4].descriptorCount = pool_sizes[3].descriptorCount; + pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER; + pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); +}; + +/* ID3D12Device */ +static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface, + REFIID riid, void **object) +{ + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + + if (IsEqualGUID(riid, &IID_ID3D12Device) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { + ID3D12Device_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + ULONG refcount = InterlockedIncrement(&device->refcount); + + TRACE("%p increasing refcount to %u.\n", device, refcount); + + return refcount; +} + +static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + ULONG refcount = InterlockedDecrement(&device->refcount); + size_t i; + + TRACE("%p decreasing refcount to %u.\n", device, refcount); + + if (!refcount) + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + vkd3d_private_store_destroy(&device->private_store); + + vkd3d_cleanup_format_info(device); + vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); + vkd3d_destroy_null_resources(&device->null_resources, device); + vkd3d_gpu_va_allocator_cleanup(&device->gpu_va_allocator); + vkd3d_gpu_descriptor_allocator_cleanup(&device->gpu_descriptor_allocator); + vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device); + vkd3d_fence_worker_stop(&device->fence_worker, device); + d3d12_device_destroy_pipeline_cache(device); + d3d12_device_destroy_vkd3d_queues(device); + for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) + vkd3d_mutex_destroy(&device->desc_mutex[i]); + VK_CALL(vkDestroyDevice(device->vk_device, NULL)); + if (device->parent) + IUnknown_Release(device->parent); + vkd3d_instance_decref(device->vkd3d_instance); + + vkd3d_free(device); + } + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", + iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&device->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", + iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&device->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device *iface, + REFGUID guid, const IUnknown *data) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&device->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const WCHAR *name) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); + + return vkd3d_set_vk_object_name(device, (uint64_t)(uintptr_t)device->vk_device, + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); +} + +static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device *iface) +{ + TRACE("iface %p.\n", iface); + + return 1; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *iface, + const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_command_queue *object; + HRESULT hr; + + TRACE("iface %p, desc %p, riid %s, command_queue %p.\n", + iface, desc, debugstr_guid(riid), command_queue); + + if (FAILED(hr = d3d12_command_queue_create(device, desc, &object))) + return hr; + + return return_interface(&object->ID3D12CommandQueue_iface, &IID_ID3D12CommandQueue, + riid, command_queue); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device *iface, + D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_command_allocator *object; + HRESULT hr; + + TRACE("iface %p, type %#x, riid %s, command_allocator %p.\n", + iface, type, debugstr_guid(riid), command_allocator); + + if (FAILED(hr = d3d12_command_allocator_create(device, type, &object))) + return hr; + + return return_interface(&object->ID3D12CommandAllocator_iface, &IID_ID3D12CommandAllocator, + riid, command_allocator); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device *iface, + const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_pipeline_state *object; + HRESULT hr; + + TRACE("iface %p, desc %p, riid %s, pipeline_state %p.\n", + iface, desc, debugstr_guid(riid), pipeline_state); + + if (FAILED(hr = d3d12_pipeline_state_create_graphics(device, desc, &object))) + return hr; + + return return_interface(&object->ID3D12PipelineState_iface, + &IID_ID3D12PipelineState, riid, pipeline_state); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device *iface, + const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_pipeline_state *object; + HRESULT hr; + + TRACE("iface %p, desc %p, riid %s, pipeline_state %p.\n", + iface, desc, debugstr_guid(riid), pipeline_state); + + if (FAILED(hr = d3d12_pipeline_state_create_compute(device, desc, &object))) + return hr; + + return return_interface(&object->ID3D12PipelineState_iface, + &IID_ID3D12PipelineState, riid, pipeline_state); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *iface, + UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, + ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_command_list *object; + HRESULT hr; + + TRACE("iface %p, node_mask 0x%08x, type %#x, command_allocator %p, " + "initial_pipeline_state %p, riid %s, command_list %p.\n", + iface, node_mask, type, command_allocator, + initial_pipeline_state, debugstr_guid(riid), command_list); + + if (FAILED(hr = d3d12_command_list_create(device, node_mask, type, command_allocator, + initial_pipeline_state, &object))) + return hr; + + return return_interface(&object->ID3D12GraphicsCommandList2_iface, + &IID_ID3D12GraphicsCommandList2, riid, command_list); +} + +/* Direct3D feature levels restrict which formats can be optionally supported. */ +static void vkd3d_restrict_format_support_for_feature_level(D3D12_FEATURE_DATA_FORMAT_SUPPORT *format_support) +{ + static const D3D12_FEATURE_DATA_FORMAT_SUPPORT blacklisted_format_features[] = + { + {DXGI_FORMAT_B8G8R8A8_TYPELESS, D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW, + D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD | D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE}, + {DXGI_FORMAT_B8G8R8A8_UNORM, D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW, + D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD | D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE}, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(blacklisted_format_features); ++i) + { + if (blacklisted_format_features[i].Format == format_support->Format) + { + format_support->Support1 &= ~blacklisted_format_features[i].Support1; + format_support->Support2 &= ~blacklisted_format_features[i].Support2; + break; + } + } +} + +static HRESULT d3d12_device_check_multisample_quality_levels(struct d3d12_device *device, + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS *data) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkImageFormatProperties vk_properties; + const struct vkd3d_format *format; + VkSampleCountFlagBits vk_samples; + VkImageUsageFlags vk_usage = 0; + VkResult vr; + + TRACE("Format %#x, sample count %u, flags %#x.\n", data->Format, data->SampleCount, data->Flags); + + data->NumQualityLevels = 0; + + if (!(vk_samples = vk_samples_from_sample_count(data->SampleCount))) + WARN("Invalid sample count %u.\n", data->SampleCount); + if (!data->SampleCount) + return E_FAIL; + + if (data->SampleCount == 1) + { + data->NumQualityLevels = 1; + goto done; + } + + if (data->Format == DXGI_FORMAT_UNKNOWN) + goto done; + + if (!(format = vkd3d_get_format(device, data->Format, false))) + format = vkd3d_get_format(device, data->Format, true); + if (!format) + { + FIXME("Unhandled format %#x.\n", data->Format); + return E_INVALIDARG; + } + if (data->Flags) + FIXME("Ignoring flags %#x.\n", data->Flags); + + if (format->vk_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) + vk_usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + else + vk_usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + vr = VK_CALL(vkGetPhysicalDeviceImageFormatProperties(device->vk_physical_device, + format->vk_format, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, vk_usage, 0, &vk_properties)); + if (vr == VK_ERROR_FORMAT_NOT_SUPPORTED) + { + WARN("Format %#x is not supported.\n", format->dxgi_format); + goto done; + } + if (vr < 0) + { + ERR("Failed to get image format properties, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + if (vk_properties.sampleCounts & vk_samples) + data->NumQualityLevels = 1; + +done: + TRACE("Returning %u quality levels.\n", data->NumQualityLevels); + return S_OK; +} + +bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) +{ + unsigned int i; + + if (coherent) + *coherent = true; + + for (i = 0; i < device->memory_properties.memoryTypeCount; ++i) + { + if (!(device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) + return false; + if (coherent && !(device->memory_properties.memoryTypes[i].propertyFlags + & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) + *coherent = false; + } + + return true; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device *iface, + D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + + TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", + iface, feature, feature_data, feature_data_size); + + switch (feature) + { + case D3D12_FEATURE_D3D12_OPTIONS: + { + D3D12_FEATURE_DATA_D3D12_OPTIONS *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + *data = device->feature_options; + + TRACE("Double precision shader ops %#x.\n", data->DoublePrecisionFloatShaderOps); + TRACE("Output merger logic op %#x.\n", data->OutputMergerLogicOp); + TRACE("Shader min precision support %#x.\n", data->MinPrecisionSupport); + TRACE("Tiled resources tier %#x.\n", data->TiledResourcesTier); + TRACE("Resource binding tier %#x.\n", data->ResourceBindingTier); + TRACE("PS specified stencil ref %#x.\n", data->PSSpecifiedStencilRefSupported); + TRACE("Typed UAV load and additional formats %#x.\n", data->TypedUAVLoadAdditionalFormats); + TRACE("ROV %#x.\n", data->ROVsSupported); + TRACE("Conservative rasterization tier %#x.\n", data->ConservativeRasterizationTier); + TRACE("Max GPU virtual address bits per resource %u.\n", data->MaxGPUVirtualAddressBitsPerResource); + TRACE("Standard swizzle 64KB %#x.\n", data->StandardSwizzle64KBSupported); + TRACE("Cross-node sharing tier %#x.\n", data->CrossNodeSharingTier); + TRACE("Cross-adapter row-major texture %#x.\n", data->CrossAdapterRowMajorTextureSupported); + TRACE("VP and RT array index from any shader without GS emulation %#x.\n", + data->VPAndRTArrayIndexFromAnyShaderFeedingRasterizerSupportedWithoutGSEmulation); + TRACE("Resource heap tier %#x.\n", data->ResourceHeapTier); + return S_OK; + } + + case D3D12_FEATURE_ARCHITECTURE: + { + D3D12_FEATURE_DATA_ARCHITECTURE *data = feature_data; + bool coherent; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + if (data->NodeIndex) + { + FIXME("Multi-adapter not supported.\n"); + return E_INVALIDARG; + } + + WARN("Assuming device does not support tile based rendering.\n"); + data->TileBasedRenderer = FALSE; + + data->UMA = d3d12_device_is_uma(device, &coherent); + data->CacheCoherentUMA = data->UMA && coherent; + + TRACE("Tile based renderer %#x, UMA %#x, cache coherent UMA %#x.\n", + data->TileBasedRenderer, data->UMA, data->CacheCoherentUMA); + return S_OK; + } + + case D3D12_FEATURE_FEATURE_LEVELS: + { + struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; + D3D12_FEATURE_DATA_FEATURE_LEVELS *data = feature_data; + unsigned int i; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + if (!data->NumFeatureLevels) + return E_INVALIDARG; + + data->MaxSupportedFeatureLevel = 0; + for (i = 0; i < data->NumFeatureLevels; ++i) + { + D3D_FEATURE_LEVEL fl = data->pFeatureLevelsRequested[i]; + if (data->MaxSupportedFeatureLevel < fl && fl <= vulkan_info->max_feature_level) + data->MaxSupportedFeatureLevel = fl; + } + + TRACE("Max supported feature level %#x.\n", data->MaxSupportedFeatureLevel); + return S_OK; + } + + case D3D12_FEATURE_FORMAT_SUPPORT: + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + D3D12_FEATURE_DATA_FORMAT_SUPPORT *data = feature_data; + VkFormatFeatureFlagBits image_features; + const struct vkd3d_format *format; + VkFormatProperties properties; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + data->Support1 = D3D12_FORMAT_SUPPORT1_NONE; + data->Support2 = D3D12_FORMAT_SUPPORT2_NONE; + if (!(format = vkd3d_get_format(device, data->Format, false))) + format = vkd3d_get_format(device, data->Format, true); + if (!format) + { + FIXME("Unhandled format %#x.\n", data->Format); + return E_INVALIDARG; + } + + VK_CALL(vkGetPhysicalDeviceFormatProperties(device->vk_physical_device, format->vk_format, &properties)); + image_features = properties.linearTilingFeatures | properties.optimalTilingFeatures; + + if (properties.bufferFeatures) + data->Support1 |= D3D12_FORMAT_SUPPORT1_BUFFER; + if (properties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) + data->Support1 |= D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER; + if (data->Format == DXGI_FORMAT_R16_UINT || data->Format == DXGI_FORMAT_R32_UINT) + data->Support1 |= D3D12_FORMAT_SUPPORT1_IA_INDEX_BUFFER; + if (image_features) + data->Support1 |= D3D12_FORMAT_SUPPORT1_TEXTURE1D | D3D12_FORMAT_SUPPORT1_TEXTURE2D + | D3D12_FORMAT_SUPPORT1_TEXTURE3D | D3D12_FORMAT_SUPPORT1_TEXTURECUBE; + if (image_features & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) + { + data->Support1 |= D3D12_FORMAT_SUPPORT1_SHADER_LOAD | D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD + | D3D12_FORMAT_SUPPORT1_SHADER_GATHER; + if (image_features & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT) + { + data->Support1 |= D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE + | D3D12_FORMAT_SUPPORT1_MIP; + } + if (format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + data->Support1 |= D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON + | D3D12_FORMAT_SUPPORT1_SHADER_GATHER_COMPARISON; + } + if (image_features & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) + data->Support1 |= D3D12_FORMAT_SUPPORT1_RENDER_TARGET | D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RENDERTARGET; + if (image_features & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT) + data->Support1 |= D3D12_FORMAT_SUPPORT1_BLENDABLE; + if (image_features & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) + data->Support1 |= D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL; + if (image_features & VK_FORMAT_FEATURE_BLIT_SRC_BIT) + data->Support1 |= D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE; + if (image_features & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) + data->Support1 |= D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW; + + if (image_features & VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT) + data->Support2 |= D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD + | D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_BITWISE_OPS + | D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_COMPARE_STORE_OR_COMPARE_EXCHANGE + | D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE + | D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_SIGNED_MIN_OR_MAX + | D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX; + + vkd3d_restrict_format_support_for_feature_level(data); + + TRACE("Format %#x, support1 %#x, support2 %#x.\n", data->Format, data->Support1, data->Support2); + return S_OK; + } + + case D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS: + { + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + return d3d12_device_check_multisample_quality_levels(device, data); + } + + case D3D12_FEATURE_FORMAT_INFO: + { + D3D12_FEATURE_DATA_FORMAT_INFO *data = feature_data; + const struct vkd3d_format *format; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + if (data->Format == DXGI_FORMAT_UNKNOWN) + { + data->PlaneCount = 1; + return S_OK; + } + + if (!(format = vkd3d_get_format(device, data->Format, false))) + format = vkd3d_get_format(device, data->Format, true); + if (!format) + { + FIXME("Unhandled format %#x.\n", data->Format); + return E_INVALIDARG; + } + + data->PlaneCount = format->plane_count; + + TRACE("Format %#x, plane count %"PRIu8".\n", data->Format, data->PlaneCount); + return S_OK; + } + + case D3D12_FEATURE_GPU_VIRTUAL_ADDRESS_SUPPORT: + { + const D3D12_FEATURE_DATA_D3D12_OPTIONS *options = &device->feature_options; + D3D12_FEATURE_DATA_GPU_VIRTUAL_ADDRESS_SUPPORT *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + data->MaxGPUVirtualAddressBitsPerResource = options->MaxGPUVirtualAddressBitsPerResource; + data->MaxGPUVirtualAddressBitsPerProcess = options->MaxGPUVirtualAddressBitsPerResource; + + TRACE("Max GPU virtual address bits per resource %u, Max GPU virtual address bits per process %u.\n", + data->MaxGPUVirtualAddressBitsPerResource, data->MaxGPUVirtualAddressBitsPerProcess); + return S_OK; + } + + case D3D12_FEATURE_SHADER_MODEL: + { + D3D12_FEATURE_DATA_SHADER_MODEL *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + TRACE("Request shader model %#x.\n", data->HighestShaderModel); + + data->HighestShaderModel = D3D_SHADER_MODEL_5_1; + + TRACE("Shader model %#x.\n", data->HighestShaderModel); + return S_OK; + } + + case D3D12_FEATURE_D3D12_OPTIONS1: + { + D3D12_FEATURE_DATA_D3D12_OPTIONS1 *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + *data = device->feature_options1; + + TRACE("Wave ops %#x.\n", data->WaveOps); + TRACE("Min wave lane count %#x.\n", data->WaveLaneCountMin); + TRACE("Max wave lane count %#x.\n", data->WaveLaneCountMax); + TRACE("Total lane count %#x.\n", data->TotalLaneCount); + TRACE("Expanded compute resource states %#x.\n", data->ExpandedComputeResourceStates); + TRACE("Int64 shader ops %#x.\n", data->Int64ShaderOps); + return S_OK; + } + + case D3D12_FEATURE_ROOT_SIGNATURE: + { + D3D12_FEATURE_DATA_ROOT_SIGNATURE *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + TRACE("Root signature requested %#x.\n", data->HighestVersion); + data->HighestVersion = min(data->HighestVersion, D3D_ROOT_SIGNATURE_VERSION_1_1); + if (device->vkd3d_instance->api_version < VKD3D_API_VERSION_1_2) + data->HighestVersion = min(data->HighestVersion, D3D_ROOT_SIGNATURE_VERSION_1_0); + + TRACE("Root signature version %#x.\n", data->HighestVersion); + return S_OK; + } + + case D3D12_FEATURE_ARCHITECTURE1: + { + D3D12_FEATURE_DATA_ARCHITECTURE1 *data = feature_data; + bool coherent; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + if (data->NodeIndex) + { + FIXME("Multi-adapter not supported.\n"); + return E_INVALIDARG; + } + + WARN("Assuming device does not support tile based rendering.\n"); + data->TileBasedRenderer = FALSE; + + data->UMA = d3d12_device_is_uma(device, &coherent); + data->CacheCoherentUMA = data->UMA && coherent; + + WARN("Assuming device does not have an isolated memory management unit.\n"); + data->IsolatedMMU = FALSE; + + TRACE("Tile based renderer %#x, UMA %#x, cache coherent UMA %#x, isolated MMU %#x.\n", + data->TileBasedRenderer, data->UMA, data->CacheCoherentUMA, data->IsolatedMMU); + return S_OK; + } + + case D3D12_FEATURE_D3D12_OPTIONS2: + { + D3D12_FEATURE_DATA_D3D12_OPTIONS2 *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + *data = device->feature_options2; + + TRACE("Depth bounds test %#x.\n", data->DepthBoundsTestSupported); + TRACE("Programmable sample positions tier %#x.\n", data->ProgrammableSamplePositionsTier); + return S_OK; + } + + case D3D12_FEATURE_SHADER_CACHE: + { + D3D12_FEATURE_DATA_SHADER_CACHE *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + /* FIXME: The d3d12 documentation states that + * D3D12_SHADER_CACHE_SUPPORT_SINGLE_PSO is always supported, but + * the CachedPSO field of D3D12_GRAPHICS_PIPELINE_STATE_DESC is + * ignored and GetCachedBlob() is a stub. */ + data->SupportFlags = D3D12_SHADER_CACHE_SUPPORT_NONE; + + TRACE("Shader cache support %#x.\n", data->SupportFlags); + return S_OK; + } + + case D3D12_FEATURE_COMMAND_QUEUE_PRIORITY: + { + D3D12_FEATURE_DATA_COMMAND_QUEUE_PRIORITY *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + switch (data->CommandListType) + { + case D3D12_COMMAND_LIST_TYPE_DIRECT: + case D3D12_COMMAND_LIST_TYPE_COMPUTE: + case D3D12_COMMAND_LIST_TYPE_COPY: + data->PriorityForTypeIsSupported = FALSE; + TRACE("Command list type %#x, priority %u, supported %#x.\n", + data->CommandListType, data->Priority, data->PriorityForTypeIsSupported); + return S_OK; + + default: + FIXME("Unhandled command list type %#x.\n", data->CommandListType); + return E_INVALIDARG; + } + } + + case D3D12_FEATURE_D3D12_OPTIONS3: + { + D3D12_FEATURE_DATA_D3D12_OPTIONS3 *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + *data = device->feature_options3; + + TRACE("Copy queue timestamp queries %#x.\n", data->CopyQueueTimestampQueriesSupported); + TRACE("Casting fully typed format %#x.\n", data->CastingFullyTypedFormatSupported); + TRACE("Write buffer immediate %#x.\n", data->WriteBufferImmediateSupportFlags); + TRACE("View instancing tier %#x.\n", data->ViewInstancingTier); + TRACE("Barycentrics %#x.\n", data->BarycentricsSupported); + return S_OK; + } + + case D3D12_FEATURE_EXISTING_HEAPS: + { + D3D12_FEATURE_DATA_EXISTING_HEAPS *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + data->Supported = FALSE; + + TRACE("Existing heaps %#x.\n", data->Supported); + return S_OK; + } + + case D3D12_FEATURE_D3D12_OPTIONS4: + { + D3D12_FEATURE_DATA_D3D12_OPTIONS4 *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + *data = device->feature_options4; + + TRACE("64 KiB aligned MSAA textures %#x.\n", data->MSAA64KBAlignedTextureSupported); + TRACE("Shared resource compatibility tier %#x.\n", data->SharedResourceCompatibilityTier); + TRACE("Native 16-bit shader ops %#x.\n", data->Native16BitShaderOpsSupported); + return S_OK; + } + + case D3D12_FEATURE_SERIALIZATION: + { + D3D12_FEATURE_DATA_SERIALIZATION *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + if (data->NodeIndex) + { + FIXME("Multi-adapter not supported.\n"); + return E_INVALIDARG; + } + + data->HeapSerializationTier = D3D12_HEAP_SERIALIZATION_TIER_0; + + TRACE("Heap serialisation tier %#x.\n", data->HeapSerializationTier); + return S_OK; + } + + case D3D12_FEATURE_CROSS_NODE: + { + D3D12_FEATURE_DATA_CROSS_NODE *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + data->SharingTier = device->feature_options.CrossNodeSharingTier; + data->AtomicShaderInstructions = FALSE; + + TRACE("Cross node sharing tier %#x.\n", data->SharingTier); + TRACE("Cross node shader atomics %#x.\n", data->AtomicShaderInstructions); + return S_OK; + } + + case D3D12_FEATURE_D3D12_OPTIONS5: + { + D3D12_FEATURE_DATA_D3D12_OPTIONS5 *data = feature_data; + + if (feature_data_size != sizeof(*data)) + { + WARN("Invalid size %u.\n", feature_data_size); + return E_INVALIDARG; + } + + *data = device->feature_options5; + + TRACE("SRV tiled resource tier 3 only %#x.\n", data->SRVOnlyTiledResourceTier3); + TRACE("Render pass tier %#x.\n", data->RenderPassesTier); + TRACE("Ray tracing tier %#x.\n", data->RaytracingTier); + return S_OK; + } + + default: + FIXME("Unhandled feature %#x.\n", feature); + return E_NOTIMPL; + } +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device *iface, + const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_descriptor_heap *object; + HRESULT hr; + + TRACE("iface %p, desc %p, riid %s, descriptor_heap %p.\n", + iface, desc, debugstr_guid(riid), descriptor_heap); + + if (FAILED(hr = d3d12_descriptor_heap_create(device, desc, &object))) + return hr; + + return return_interface(&object->ID3D12DescriptorHeap_iface, + &IID_ID3D12DescriptorHeap, riid, descriptor_heap); +} + +static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device *iface, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) +{ + TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); + + switch (descriptor_heap_type) + { + case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV: + case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER: + return sizeof(struct d3d12_desc); + + case D3D12_DESCRIPTOR_HEAP_TYPE_RTV: + return sizeof(struct d3d12_rtv_desc); + + case D3D12_DESCRIPTOR_HEAP_TYPE_DSV: + return sizeof(struct d3d12_dsv_desc); + + default: + FIXME("Unhandled type %#x.\n", descriptor_heap_type); + return 0; + } +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device *iface, + UINT node_mask, const void *bytecode, SIZE_T bytecode_length, + REFIID riid, void **root_signature) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_root_signature *object; + HRESULT hr; + + TRACE("iface %p, node_mask 0x%08x, bytecode %p, bytecode_length %lu, riid %s, root_signature %p.\n", + iface, node_mask, bytecode, bytecode_length, debugstr_guid(riid), root_signature); + + debug_ignored_node_mask(node_mask); + + if (FAILED(hr = d3d12_root_signature_create(device, bytecode, bytecode_length, &object))) + return hr; + + return return_interface(&object->ID3D12RootSignature_iface, + &IID_ID3D12RootSignature, riid, root_signature); +} + +static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device *iface, + const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); + + d3d12_desc_create_cbv(&tmp, device, desc); + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); +} + +static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device *iface, + ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", + iface, resource, desc, descriptor.ptr); + + d3d12_desc_create_srv(&tmp, device, unsafe_impl_from_ID3D12Resource(resource), desc); + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); +} + +static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device *iface, + ID3D12Resource *resource, ID3D12Resource *counter_resource, + const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %#lx.\n", + iface, resource, counter_resource, desc, descriptor.ptr); + + d3d12_desc_create_uav(&tmp, device, unsafe_impl_from_ID3D12Resource(resource), + unsafe_impl_from_ID3D12Resource(counter_resource), desc); + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); +} + +static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device *iface, + ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) +{ + TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", + iface, resource, desc, descriptor.ptr); + + d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), + impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); +} + +static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device *iface, + ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) +{ + TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", + iface, resource, desc, descriptor.ptr); + + d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), + impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); +} + +static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, + const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); + + d3d12_desc_create_sampler(&tmp, device, desc); + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); +} + +static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, + UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, + const UINT *dst_descriptor_range_sizes, + UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, + const UINT *src_descriptor_range_sizes, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; + unsigned int dst_range_size, src_range_size; + const struct d3d12_desc *src; + struct d3d12_desc *dst; + + TRACE("iface %p, dst_descriptor_range_count %u, dst_descriptor_range_offsets %p, " + "dst_descriptor_range_sizes %p, src_descriptor_range_count %u, " + "src_descriptor_range_offsets %p, src_descriptor_range_sizes %p, " + "descriptor_heap_type %#x.\n", + iface, dst_descriptor_range_count, dst_descriptor_range_offsets, + dst_descriptor_range_sizes, src_descriptor_range_count, src_descriptor_range_offsets, + src_descriptor_range_sizes, descriptor_heap_type); + + if (descriptor_heap_type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + && descriptor_heap_type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) + { + FIXME("Unhandled descriptor heap type %#x.\n", descriptor_heap_type); + return; + } + + dst_range_idx = dst_idx = 0; + src_range_idx = src_idx = 0; + while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) + { + dst_range_size = dst_descriptor_range_sizes ? dst_descriptor_range_sizes[dst_range_idx] : 1; + src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; + + dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); + src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); + + while (dst_idx < dst_range_size && src_idx < src_range_size) + d3d12_desc_copy(&dst[dst_idx++], &src[src_idx++], device); + + if (dst_idx >= dst_range_size) + { + ++dst_range_idx; + dst_idx = 0; + } + if (src_idx >= src_range_size) + { + ++src_range_idx; + src_idx = 0; + } + } +} + +static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *iface, + UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, + const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) +{ + TRACE("iface %p, descriptor_count %u, dst_descriptor_range_offset %#lx, " + "src_descriptor_range_offset %#lx, descriptor_heap_type %#x.\n", + iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, + descriptor_heap_type); + + d3d12_device_CopyDescriptors(iface, 1, &dst_descriptor_range_offset, &descriptor_count, + 1, &src_descriptor_range_offset, &descriptor_count, descriptor_heap_type); +} + +static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( + ID3D12Device *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, + UINT count, const D3D12_RESOURCE_DESC *resource_descs) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + const D3D12_RESOURCE_DESC *desc; + uint64_t requested_alignment; + + TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p.\n", + iface, info, visible_mask, count, resource_descs); + + debug_ignored_node_mask(visible_mask); + + info->SizeInBytes = 0; + info->Alignment = 0; + + if (count != 1) + { + FIXME("Multiple resource descriptions not supported.\n"); + return info; + } + + desc = &resource_descs[0]; + + if (FAILED(d3d12_resource_validate_desc(desc, device))) + { + WARN("Invalid resource desc.\n"); + goto invalid; + } + + if (desc->Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + { + info->SizeInBytes = align(desc->Width, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT); + info->Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + } + else + { + if (FAILED(vkd3d_get_image_allocation_info(device, desc, info))) + { + WARN("Failed to get allocation info for texture.\n"); + goto invalid; + } + + requested_alignment = desc->Alignment + ? desc->Alignment : D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + info->Alignment = max(info->Alignment, requested_alignment); + + info->SizeInBytes = align(info->SizeInBytes, info->Alignment); + + /* Pad by the maximum heap offset increase which may be needed to align to a higher + * Vulkan requirement an offset supplied by the calling application. This allows + * us to return the standard D3D12 alignment and adjust resource placement later. */ + if (info->Alignment > requested_alignment) + { + info->SizeInBytes += info->Alignment - requested_alignment; + info->Alignment = requested_alignment; + } + } + + TRACE("Size %#"PRIx64", alignment %#"PRIx64".\n", info->SizeInBytes, info->Alignment); + + return info; + +invalid: + info->SizeInBytes = ~(uint64_t)0; + + /* FIXME: Should we support D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT for small MSSA resources? */ + if (desc->SampleDesc.Count != 1) + info->Alignment = D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT; + else + info->Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + + TRACE("Alignment %#"PRIx64".\n", info->Alignment); + + return info; +} + +static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device *iface, + D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + bool coherent; + + TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", + iface, heap_properties, node_mask, heap_type); + + debug_ignored_node_mask(node_mask); + + heap_properties->Type = D3D12_HEAP_TYPE_CUSTOM; + + switch (heap_type) + { + case D3D12_HEAP_TYPE_DEFAULT: + heap_properties->CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE; + heap_properties->MemoryPoolPreference = d3d12_device_is_uma(device, NULL) + ? D3D12_MEMORY_POOL_L0 : D3D12_MEMORY_POOL_L1; + break; + + case D3D12_HEAP_TYPE_UPLOAD: + heap_properties->CPUPageProperty = d3d12_device_is_uma(device, &coherent) && coherent + ? D3D12_CPU_PAGE_PROPERTY_WRITE_BACK : D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; + heap_properties->MemoryPoolPreference = D3D12_MEMORY_POOL_L0; + break; + + case D3D12_HEAP_TYPE_READBACK: + heap_properties->CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; + heap_properties->MemoryPoolPreference = D3D12_MEMORY_POOL_L0; + break; + + default: + FIXME("Unhandled heap type %#x.\n", heap_type); + break; + }; + + heap_properties->CreationNodeMask = 1; + heap_properties->VisibleNodeMask = 1; + + return heap_properties; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device *iface, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_resource *object; + HRESULT hr; + + TRACE("iface %p, heap_properties %p, heap_flags %#x, desc %p, initial_state %#x, " + "optimized_clear_value %p, iid %s, resource %p.\n", + iface, heap_properties, heap_flags, desc, initial_state, + optimized_clear_value, debugstr_guid(iid), resource); + + if (FAILED(hr = d3d12_committed_resource_create(device, heap_properties, heap_flags, + desc, initial_state, optimized_clear_value, &object))) + { + *resource = NULL; + return hr; + } + + return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, + const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_heap *object; + HRESULT hr; + + TRACE("iface %p, desc %p, iid %s, heap %p.\n", + iface, desc, debugstr_guid(iid), heap); + + if (FAILED(hr = d3d12_heap_create(device, desc, NULL, &object))) + { + *heap = NULL; + return hr; + } + + return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device *iface, + ID3D12Heap *heap, UINT64 heap_offset, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_heap *heap_object; + struct d3d12_resource *object; + HRESULT hr; + + TRACE("iface %p, heap %p, heap_offset %#"PRIx64", desc %p, initial_state %#x, " + "optimized_clear_value %p, iid %s, resource %p.\n", + iface, heap, heap_offset, desc, initial_state, + optimized_clear_value, debugstr_guid(iid), resource); + + heap_object = unsafe_impl_from_ID3D12Heap(heap); + + if (FAILED(hr = d3d12_placed_resource_create(device, heap_object, heap_offset, + desc, initial_state, optimized_clear_value, &object))) + return hr; + + return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device *iface, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_resource *object; + HRESULT hr; + + TRACE("iface %p, desc %p, initial_state %#x, optimized_clear_value %p, iid %s, resource %p.\n", + iface, desc, initial_state, optimized_clear_value, debugstr_guid(iid), resource); + + if (FAILED(hr = d3d12_reserved_resource_create(device, + desc, initial_state, optimized_clear_value, &object))) + return hr; + + return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *iface, + ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, + const WCHAR *name, HANDLE *handle) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + + FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", + iface, object, attributes, access, debugstr_w(name, device->wchar_size), handle); + + return E_NOTIMPL; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *iface, + HANDLE handle, REFIID riid, void **object) +{ + FIXME("iface %p, handle %p, riid %s, object %p stub!\n", + iface, handle, debugstr_guid(riid), object); + + return E_NOTIMPL; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device *iface, + const WCHAR *name, DWORD access, HANDLE *handle) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + + FIXME("iface %p, name %s, access %#x, handle %p stub!\n", + iface, debugstr_w(name, device->wchar_size), access, handle); + + return E_NOTIMPL; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, + UINT object_count, ID3D12Pageable * const *objects) +{ + FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", + iface, object_count, objects); + + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, + UINT object_count, ID3D12Pageable * const *objects) +{ + FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", + iface, object_count, objects); + + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, + UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_fence *object; + HRESULT hr; + + TRACE("iface %p, intial_value %#"PRIx64", flags %#x, riid %s, fence %p.\n", + iface, initial_value, flags, debugstr_guid(riid), fence); + + if (FAILED(hr = d3d12_fence_create(device, initial_value, flags, &object))) + return hr; + + return return_interface(&object->ID3D12Fence_iface, &IID_ID3D12Fence, riid, fence); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device *iface) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + + TRACE("iface %p.\n", iface); + + return device->removed_reason; +} + +static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *iface, + const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, + UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, + UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + + unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; + unsigned int width, height, depth, plane_count, sub_resources_per_plane; + const struct vkd3d_format *format; + uint64_t offset, size, total; + + TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " + "layouts %p, row_counts %p, row_sizes %p, total_bytes %p.\n", + iface, desc, first_sub_resource, sub_resource_count, base_offset, + layouts, row_counts, row_sizes, total_bytes); + + if (layouts) + memset(layouts, 0xff, sizeof(*layouts) * sub_resource_count); + if (row_counts) + memset(row_counts, 0xff, sizeof(*row_counts) * sub_resource_count); + if (row_sizes) + memset(row_sizes, 0xff, sizeof(*row_sizes) * sub_resource_count); + if (total_bytes) + *total_bytes = ~(uint64_t)0; + + if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) + { + WARN("Invalid format %#x.\n", desc->Format); + return; + } + + if (FAILED(d3d12_resource_validate_desc(desc, device))) + { + WARN("Invalid resource desc.\n"); + return; + } + + plane_count = ((format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + && (format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)) ? 2 : 1; + sub_resources_per_plane = d3d12_resource_desc_get_sub_resource_count(desc); + + if (!vkd3d_bound_range(first_sub_resource, sub_resource_count, sub_resources_per_plane * plane_count)) + { + WARN("Invalid sub-resource range %u-%u for resource.\n", first_sub_resource, sub_resource_count); + return; + } + + offset = 0; + total = 0; + for (i = 0; i < sub_resource_count; ++i) + { + sub_resource_idx = (first_sub_resource + i) % sub_resources_per_plane; + miplevel_idx = sub_resource_idx % desc->MipLevels; + width = align(d3d12_resource_desc_get_width(desc, miplevel_idx), format->block_width); + height = align(d3d12_resource_desc_get_height(desc, miplevel_idx), format->block_height); + depth = d3d12_resource_desc_get_depth(desc, miplevel_idx); + row_count = height / format->block_height; + row_size = (width / format->block_width) * format->byte_count * format->block_byte_count; + row_pitch = align(row_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + if (layouts) + { + layouts[i].Offset = base_offset + offset; + layouts[i].Footprint.Format = desc->Format; + layouts[i].Footprint.Width = width; + layouts[i].Footprint.Height = height; + layouts[i].Footprint.Depth = depth; + layouts[i].Footprint.RowPitch = row_pitch; + } + if (row_counts) + row_counts[i] = row_count; + if (row_sizes) + row_sizes[i] = row_size; + + size = max(0, row_count - 1) * row_pitch + row_size; + size = max(0, depth - 1) * align(size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + size; + + total = offset + size; + offset = align(total, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + } + if (total_bytes) + *total_bytes = total; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *iface, + const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_query_heap *object; + HRESULT hr; + + TRACE("iface %p, desc %p, iid %s, heap %p.\n", + iface, desc, debugstr_guid(iid), heap); + + if (FAILED(hr = d3d12_query_heap_create(device, desc, &object))) + return hr; + + return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device *iface, BOOL enable) +{ + FIXME("iface %p, enable %#x stub!\n", iface, enable); + + return E_NOTIMPL; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device *iface, + const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, + REFIID iid, void **command_signature) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_command_signature *object; + HRESULT hr; + + TRACE("iface %p, desc %p, root_signature %p, iid %s, command_signature %p.\n", + iface, desc, root_signature, debugstr_guid(iid), command_signature); + + if (FAILED(hr = d3d12_command_signature_create(device, desc, &object))) + return hr; + + return return_interface(&object->ID3D12CommandSignature_iface, + &IID_ID3D12CommandSignature, iid, command_signature); +} + +static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device *iface, + ID3D12Resource *resource, UINT *total_tile_count, + D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, + UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, + D3D12_SUBRESOURCE_TILING *sub_resource_tilings) +{ + FIXME("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " + "standard_title_shape %p, sub_resource_tiling_count %p, " + "first_sub_resource_tiling %u, sub_resource_tilings %p stub!\n", + iface, resource, total_tile_count, packed_mip_info, standard_tile_shape, + sub_resource_tiling_count, first_sub_resource_tiling, + sub_resource_tilings); +} + +static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, LUID *luid) +{ + struct d3d12_device *device = impl_from_ID3D12Device(iface); + + TRACE("iface %p, luid %p.\n", iface, luid); + + *luid = device->adapter_luid; + + return luid; +} + +static const struct ID3D12DeviceVtbl d3d12_device_vtbl = +{ + /* IUnknown methods */ + d3d12_device_QueryInterface, + d3d12_device_AddRef, + d3d12_device_Release, + /* ID3D12Object methods */ + d3d12_device_GetPrivateData, + d3d12_device_SetPrivateData, + d3d12_device_SetPrivateDataInterface, + d3d12_device_SetName, + /* ID3D12Device methods */ + d3d12_device_GetNodeCount, + d3d12_device_CreateCommandQueue, + d3d12_device_CreateCommandAllocator, + d3d12_device_CreateGraphicsPipelineState, + d3d12_device_CreateComputePipelineState, + d3d12_device_CreateCommandList, + d3d12_device_CheckFeatureSupport, + d3d12_device_CreateDescriptorHeap, + d3d12_device_GetDescriptorHandleIncrementSize, + d3d12_device_CreateRootSignature, + d3d12_device_CreateConstantBufferView, + d3d12_device_CreateShaderResourceView, + d3d12_device_CreateUnorderedAccessView, + d3d12_device_CreateRenderTargetView, + d3d12_device_CreateDepthStencilView, + d3d12_device_CreateSampler, + d3d12_device_CopyDescriptors, + d3d12_device_CopyDescriptorsSimple, + d3d12_device_GetResourceAllocationInfo, + d3d12_device_GetCustomHeapProperties, + d3d12_device_CreateCommittedResource, + d3d12_device_CreateHeap, + d3d12_device_CreatePlacedResource, + d3d12_device_CreateReservedResource, + d3d12_device_CreateSharedHandle, + d3d12_device_OpenSharedHandle, + d3d12_device_OpenSharedHandleByName, + d3d12_device_MakeResident, + d3d12_device_Evict, + d3d12_device_CreateFence, + d3d12_device_GetDeviceRemovedReason, + d3d12_device_GetCopyableFootprints, + d3d12_device_CreateQueryHeap, + d3d12_device_SetStablePowerState, + d3d12_device_CreateCommandSignature, + d3d12_device_GetResourceTiling, + d3d12_device_GetAdapterLuid, +}; + +struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface) +{ + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_device_vtbl); + return impl_from_ID3D12Device(iface); +} + +static HRESULT d3d12_device_init(struct d3d12_device *device, + struct vkd3d_instance *instance, const struct vkd3d_device_create_info *create_info) +{ + const struct vkd3d_vk_device_procs *vk_procs; + HRESULT hr; + size_t i; + + device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; + device->refcount = 1; + + vkd3d_instance_incref(device->vkd3d_instance = instance); + device->vk_info = instance->vk_info; + device->signal_event = instance->signal_event; + device->wchar_size = instance->wchar_size; + + device->adapter_luid = create_info->adapter_luid; + device->removed_reason = S_OK; + + device->vk_device = VK_NULL_HANDLE; + + if (FAILED(hr = vkd3d_create_vk_device(device, create_info))) + goto out_free_instance; + + if (FAILED(hr = d3d12_device_init_pipeline_cache(device))) + goto out_free_vk_resources; + + if (FAILED(hr = vkd3d_private_store_init(&device->private_store))) + goto out_free_pipeline_cache; + + if (FAILED(hr = vkd3d_fence_worker_start(&device->fence_worker, device))) + goto out_free_private_store; + + if (FAILED(hr = vkd3d_init_format_info(device))) + goto out_stop_fence_worker; + + if (FAILED(hr = vkd3d_init_null_resources(&device->null_resources, device))) + goto out_cleanup_format_info; + + if (FAILED(hr = vkd3d_uav_clear_state_init(&device->uav_clear_state, device))) + goto out_destroy_null_resources; + + vkd3d_render_pass_cache_init(&device->render_pass_cache); + vkd3d_gpu_descriptor_allocator_init(&device->gpu_descriptor_allocator); + vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator); + vkd3d_time_domains_init(device); + + for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) + vkd3d_mutex_init(&device->desc_mutex[i]); + + vkd3d_init_descriptor_pool_sizes(device->vk_pool_sizes, &device->vk_info.descriptor_limits); + + if ((device->parent = create_info->parent)) + IUnknown_AddRef(device->parent); + + return S_OK; + +out_destroy_null_resources: + vkd3d_destroy_null_resources(&device->null_resources, device); +out_cleanup_format_info: + vkd3d_cleanup_format_info(device); +out_stop_fence_worker: + vkd3d_fence_worker_stop(&device->fence_worker, device); +out_free_private_store: + vkd3d_private_store_destroy(&device->private_store); +out_free_pipeline_cache: + d3d12_device_destroy_pipeline_cache(device); +out_free_vk_resources: + vk_procs = &device->vk_procs; + VK_CALL(vkDestroyDevice(device->vk_device, NULL)); +out_free_instance: + vkd3d_instance_decref(device->vkd3d_instance); + return hr; +} + +HRESULT d3d12_device_create(struct vkd3d_instance *instance, + const struct vkd3d_device_create_info *create_info, struct d3d12_device **device) +{ + struct d3d12_device *object; + HRESULT hr; + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_device_init(object, instance, create_info))) + { + vkd3d_free(object); + return hr; + } + + TRACE("Created device %p.\n", object); + + *device = object; + + return S_OK; +} + +void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, + const char *message, ...) +{ + va_list args; + + va_start(args, message); + WARN("Device %p is lost (reason %#x, \"%s\").\n", + device, reason, vkd3d_dbg_vsprintf(message, args)); + va_end(args); + + device->removed_reason = reason; +} + + +#ifdef _WIN32 +struct thread_data +{ + PFN_vkd3d_thread main_pfn; + void *data; +}; + +static DWORD WINAPI call_thread_main(void *data) +{ + struct thread_data *thread_data = data; + thread_data->main_pfn(thread_data->data); + vkd3d_free(thread_data); + return 0; +} +#endif + +HRESULT vkd3d_create_thread(struct vkd3d_instance *instance, + PFN_vkd3d_thread thread_main, void *data, union vkd3d_thread_handle *thread) +{ + HRESULT hr = S_OK; + int rc; + + if (instance->create_thread) + { + if (!(thread->handle = instance->create_thread(thread_main, data))) + { + ERR("Failed to create thread.\n"); + hr = E_FAIL; + } + } + else + { +#ifdef _WIN32 + struct thread_data *thread_data; + + if (!(thread_data = vkd3d_malloc(sizeof(*thread_data)))) + return E_OUTOFMEMORY; + + thread_data->main_pfn = thread_main; + thread_data->data = data; + if (!(thread->handle = CreateThread(NULL, 0, call_thread_main, thread_data, 0, NULL))) + { + ERR("Failed to create thread, error %d.\n", GetLastError()); + vkd3d_free(thread_data); + hr = E_FAIL; + } +#else + if ((rc = pthread_create(&thread->pthread, NULL, thread_main, data))) + { + ERR("Failed to create thread, error %d.\n", rc); + hr = hresult_from_errno(rc); + } +#endif + } + + return hr; +} + +HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_handle *thread) +{ + HRESULT hr = S_OK; + int rc; + + if (instance->join_thread) + { + if (FAILED(hr = instance->join_thread(thread->handle))) + ERR("Failed to join thread, hr %#x.\n", hr); + } + else + { +#ifdef _WIN32 + if ((rc = WaitForSingleObject(thread->handle, INFINITE)) != WAIT_OBJECT_0) + { + ERR("Failed to wait for thread, ret %#x.\n", rc); + hr = E_FAIL; + } + CloseHandle(thread->handle); +#else + if ((rc = pthread_join(thread->pthread, NULL))) + { + ERR("Failed to join thread, error %d.\n", rc); + hr = hresult_from_errno(rc); + } +#endif + } + + return hr; +} + +IUnknown *vkd3d_get_device_parent(ID3D12Device *device) +{ + struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); + + return d3d12_device->parent; +} + +VkDevice vkd3d_get_vk_device(ID3D12Device *device) +{ + struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); + + return d3d12_device->vk_device; +} + +VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) +{ + struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); + + return d3d12_device->vk_physical_device; +} + +struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) +{ + struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); + + return d3d12_device->vkd3d_instance; +} diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c new file mode 100644 index 00000000000..4c48e22e194 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -0,0 +1,4201 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * Copyright 2019 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_private.h" + +#define VKD3D_NULL_BUFFER_SIZE 16 +#define VKD3D_NULL_VIEW_FORMAT DXGI_FORMAT_R8G8B8A8_UNORM + +static inline bool is_cpu_accessible_heap(const D3D12_HEAP_PROPERTIES *properties) +{ + if (properties->Type == D3D12_HEAP_TYPE_DEFAULT) + return false; + if (properties->Type == D3D12_HEAP_TYPE_CUSTOM) + { + return properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE + || properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; + } + return true; +} + +static HRESULT vkd3d_select_memory_type(struct d3d12_device *device, uint32_t memory_type_mask, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, unsigned int *type_index) +{ + const VkPhysicalDeviceMemoryProperties *memory_info = &device->memory_properties; + VkMemoryPropertyFlags flags[3]; + unsigned int i, j, count = 0; + + switch (heap_properties->Type) + { + case D3D12_HEAP_TYPE_DEFAULT: + flags[count++] = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; + + case D3D12_HEAP_TYPE_UPLOAD: + flags[count++] = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + break; + + case D3D12_HEAP_TYPE_READBACK: + flags[count++] = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + flags[count++] = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + break; + + case D3D12_HEAP_TYPE_CUSTOM: + if (heap_properties->MemoryPoolPreference == D3D12_MEMORY_POOL_UNKNOWN + || (heap_properties->MemoryPoolPreference == D3D12_MEMORY_POOL_L1 + && (is_cpu_accessible_heap(heap_properties) || d3d12_device_is_uma(device, NULL)))) + { + WARN("Invalid memory pool preference.\n"); + return E_INVALIDARG; + } + + switch (heap_properties->CPUPageProperty) + { + case D3D12_CPU_PAGE_PROPERTY_WRITE_BACK: + flags[count++] = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + /* Fall through. */ + case D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE: + flags[count++] = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + /* Fall through. */ + case D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE: + flags[count++] = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; + case D3D12_CPU_PAGE_PROPERTY_UNKNOWN: + default: + WARN("Invalid CPU page property.\n"); + return E_INVALIDARG; + } + break; + + default: + WARN("Invalid heap type %#x.\n", heap_properties->Type); + return E_INVALIDARG; + } + + for (j = 0; j < count; ++j) + { + VkMemoryPropertyFlags preferred_flags = flags[j]; + + for (i = 0; i < memory_info->memoryTypeCount; ++i) + { + if (!(memory_type_mask & (1u << i))) + continue; + if ((memory_info->memoryTypes[i].propertyFlags & preferred_flags) == preferred_flags) + { + *type_index = i; + return S_OK; + } + } + } + + return E_FAIL; +} + +static HRESULT vkd3d_allocate_device_memory(struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const VkMemoryRequirements *memory_requirements, + const VkMemoryDedicatedAllocateInfo *dedicated_allocate_info, + VkDeviceMemory *vk_memory, uint32_t *vk_memory_type) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkMemoryAllocateInfo allocate_info; + VkResult vr; + HRESULT hr; + + TRACE("Memory requirements: size %#"PRIx64", alignment %#"PRIx64".\n", + memory_requirements->size, memory_requirements->alignment); + + allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocate_info.pNext = dedicated_allocate_info; + allocate_info.allocationSize = memory_requirements->size; + if (FAILED(hr = vkd3d_select_memory_type(device, memory_requirements->memoryTypeBits, + heap_properties, heap_flags, &allocate_info.memoryTypeIndex))) + { + if (hr != E_INVALIDARG) + FIXME("Failed to find suitable memory type (allowed types %#x).\n", memory_requirements->memoryTypeBits); + *vk_memory = VK_NULL_HANDLE; + return hr; + } + + TRACE("Allocating memory type %u.\n", allocate_info.memoryTypeIndex); + + if ((vr = VK_CALL(vkAllocateMemory(device->vk_device, &allocate_info, NULL, vk_memory))) < 0) + { + WARN("Failed to allocate device memory, vr %d.\n", vr); + *vk_memory = VK_NULL_HANDLE; + return hresult_from_vk_result(vr); + } + + if (vk_memory_type) + *vk_memory_type = allocate_info.memoryTypeIndex; + + return S_OK; +} + +HRESULT vkd3d_allocate_buffer_memory(struct d3d12_device *device, VkBuffer vk_buffer, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + VkDeviceMemory *vk_memory, uint32_t *vk_memory_type, VkDeviceSize *vk_memory_size) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkMemoryDedicatedAllocateInfo *dedicated_allocation = NULL; + VkMemoryDedicatedRequirements dedicated_requirements; + VkMemoryDedicatedAllocateInfo dedicated_info; + VkMemoryRequirements2 memory_requirements2; + VkMemoryRequirements *memory_requirements; + VkBufferMemoryRequirementsInfo2 info; + VkResult vr; + HRESULT hr; + + memory_requirements = &memory_requirements2.memoryRequirements; + + if (device->vk_info.KHR_dedicated_allocation) + { + info.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2; + info.pNext = NULL; + info.buffer = vk_buffer; + + dedicated_requirements.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS; + dedicated_requirements.pNext = NULL; + + memory_requirements2.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; + memory_requirements2.pNext = &dedicated_requirements; + + VK_CALL(vkGetBufferMemoryRequirements2KHR(device->vk_device, &info, &memory_requirements2)); + + if (dedicated_requirements.prefersDedicatedAllocation) + { + dedicated_allocation = &dedicated_info; + + dedicated_info.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; + dedicated_info.pNext = NULL; + dedicated_info.image = VK_NULL_HANDLE; + dedicated_info.buffer = vk_buffer; + } + } + else + { + VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, vk_buffer, memory_requirements)); + } + + if (FAILED(hr = vkd3d_allocate_device_memory(device, heap_properties, heap_flags, + memory_requirements, dedicated_allocation, vk_memory, vk_memory_type))) + return hr; + + if ((vr = VK_CALL(vkBindBufferMemory(device->vk_device, vk_buffer, *vk_memory, 0))) < 0) + { + WARN("Failed to bind memory, vr %d.\n", vr); + VK_CALL(vkFreeMemory(device->vk_device, *vk_memory, NULL)); + *vk_memory = VK_NULL_HANDLE; + } + + if (vk_memory_size) + *vk_memory_size = memory_requirements->size; + + return hresult_from_vk_result(vr); +} + +static HRESULT vkd3d_allocate_image_memory(struct d3d12_device *device, VkImage vk_image, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + VkDeviceMemory *vk_memory, uint32_t *vk_memory_type, VkDeviceSize *vk_memory_size) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkMemoryDedicatedAllocateInfo *dedicated_allocation = NULL; + VkMemoryDedicatedRequirements dedicated_requirements; + VkMemoryDedicatedAllocateInfo dedicated_info; + VkMemoryRequirements2 memory_requirements2; + VkMemoryRequirements *memory_requirements; + VkImageMemoryRequirementsInfo2 info; + VkResult vr; + HRESULT hr; + + memory_requirements = &memory_requirements2.memoryRequirements; + + if (device->vk_info.KHR_dedicated_allocation) + { + info.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2; + info.pNext = NULL; + info.image = vk_image; + + dedicated_requirements.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS; + dedicated_requirements.pNext = NULL; + + memory_requirements2.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; + memory_requirements2.pNext = &dedicated_requirements; + + VK_CALL(vkGetImageMemoryRequirements2KHR(device->vk_device, &info, &memory_requirements2)); + + if (dedicated_requirements.prefersDedicatedAllocation) + { + dedicated_allocation = &dedicated_info; + + dedicated_info.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; + dedicated_info.pNext = NULL; + dedicated_info.image = vk_image; + dedicated_info.buffer = VK_NULL_HANDLE; + } + } + else + { + VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, memory_requirements)); + } + + if (FAILED(hr = vkd3d_allocate_device_memory(device, heap_properties, heap_flags, + memory_requirements, dedicated_allocation, vk_memory, vk_memory_type))) + return hr; + + if ((vr = VK_CALL(vkBindImageMemory(device->vk_device, vk_image, *vk_memory, 0))) < 0) + { + WARN("Failed to bind memory, vr %d.\n", vr); + VK_CALL(vkFreeMemory(device->vk_device, *vk_memory, NULL)); + *vk_memory = VK_NULL_HANDLE; + return hresult_from_vk_result(vr); + } + + if (vk_memory_size) + *vk_memory_size = memory_requirements->size; + + return S_OK; +} + +/* ID3D12Heap */ +static inline struct d3d12_heap *impl_from_ID3D12Heap(ID3D12Heap *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_heap, ID3D12Heap_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_heap_QueryInterface(ID3D12Heap *iface, + REFIID iid, void **object) +{ + TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); + + if (IsEqualGUID(iid, &IID_ID3D12Heap) + || IsEqualGUID(iid, &IID_ID3D12Pageable) + || IsEqualGUID(iid, &IID_ID3D12DeviceChild) + || IsEqualGUID(iid, &IID_ID3D12Object) + || IsEqualGUID(iid, &IID_IUnknown)) + { + ID3D12Heap_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_heap_AddRef(ID3D12Heap *iface) +{ + struct d3d12_heap *heap = impl_from_ID3D12Heap(iface); + ULONG refcount = InterlockedIncrement(&heap->refcount); + + TRACE("%p increasing refcount to %u.\n", heap, refcount); + + assert(!heap->is_private); + + return refcount; +} + +static void d3d12_heap_destroy(struct d3d12_heap *heap) +{ + struct d3d12_device *device = heap->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + TRACE("Destroying heap %p.\n", heap); + + vkd3d_private_store_destroy(&heap->private_store); + + VK_CALL(vkFreeMemory(device->vk_device, heap->vk_memory, NULL)); + + vkd3d_mutex_destroy(&heap->mutex); + + if (heap->is_private) + device = NULL; + + vkd3d_free(heap); + + if (device) + d3d12_device_release(device); +} + +static ULONG STDMETHODCALLTYPE d3d12_heap_Release(ID3D12Heap *iface) +{ + struct d3d12_heap *heap = impl_from_ID3D12Heap(iface); + ULONG refcount = InterlockedDecrement(&heap->refcount); + + TRACE("%p decreasing refcount to %u.\n", heap, refcount); + + if (!refcount) + d3d12_heap_destroy(heap); + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_heap_GetPrivateData(ID3D12Heap *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_heap *heap = impl_from_ID3D12Heap(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&heap->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_heap_SetPrivateData(ID3D12Heap *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_heap *heap = impl_from_ID3D12Heap(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&heap->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_heap_SetPrivateDataInterface(ID3D12Heap *iface, + REFGUID guid, const IUnknown *data) +{ + struct d3d12_heap *heap = impl_from_ID3D12Heap(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&heap->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_heap_SetName(ID3D12Heap *iface, const WCHAR *name) +{ + struct d3d12_heap *heap = impl_from_ID3D12Heap(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, heap->device->wchar_size)); + + return vkd3d_set_vk_object_name(heap->device, (uint64_t)heap->vk_memory, + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, name); +} + +static HRESULT STDMETHODCALLTYPE d3d12_heap_GetDevice(ID3D12Heap *iface, REFIID iid, void **device) +{ + struct d3d12_heap *heap = impl_from_ID3D12Heap(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(heap->device, iid, device); +} + +static D3D12_HEAP_DESC * STDMETHODCALLTYPE d3d12_heap_GetDesc(ID3D12Heap *iface, + D3D12_HEAP_DESC *desc) +{ + struct d3d12_heap *heap = impl_from_ID3D12Heap(iface); + + TRACE("iface %p, desc %p.\n", iface, desc); + + *desc = heap->desc; + return desc; +} + +static const struct ID3D12HeapVtbl d3d12_heap_vtbl = +{ + /* IUnknown methods */ + d3d12_heap_QueryInterface, + d3d12_heap_AddRef, + d3d12_heap_Release, + /* ID3D12Object methods */ + d3d12_heap_GetPrivateData, + d3d12_heap_SetPrivateData, + d3d12_heap_SetPrivateDataInterface, + d3d12_heap_SetName, + /* ID3D12DeviceChild methods */ + d3d12_heap_GetDevice, + /* ID3D12Heap methods */ + d3d12_heap_GetDesc, +}; + +struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) +{ + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_heap_vtbl); + return impl_from_ID3D12Heap(iface); +} + +static HRESULT d3d12_heap_map(struct d3d12_heap *heap, uint64_t offset, + struct d3d12_resource *resource, void **data) +{ + struct d3d12_device *device = heap->device; + HRESULT hr = S_OK; + VkResult vr; + int rc; + + if ((rc = vkd3d_mutex_lock(&heap->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + if (data) + *data = NULL; + return hresult_from_errno(rc); + } + + assert(!resource->map_count || heap->map_ptr); + + if (!resource->map_count) + { + if (!heap->map_ptr) + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + TRACE("Mapping heap %p.\n", heap); + + assert(!heap->map_count); + + if ((vr = VK_CALL(vkMapMemory(device->vk_device, heap->vk_memory, + 0, VK_WHOLE_SIZE, 0, &heap->map_ptr))) < 0) + { + WARN("Failed to map device memory, vr %d.\n", vr); + heap->map_ptr = NULL; + } + + hr = hresult_from_vk_result(vr); + } + + if (heap->map_ptr) + ++heap->map_count; + } + + if (hr == S_OK) + { + assert(heap->map_ptr); + if (data) + *data = (BYTE *)heap->map_ptr + offset; + ++resource->map_count; + } + else + { + assert(!heap->map_ptr); + if (data) + *data = NULL; + } + + vkd3d_mutex_unlock(&heap->mutex); + + return hr; +} + +static void d3d12_heap_unmap(struct d3d12_heap *heap, struct d3d12_resource *resource) +{ + struct d3d12_device *device = heap->device; + int rc; + + if ((rc = vkd3d_mutex_lock(&heap->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + if (!resource->map_count) + { + WARN("Resource %p is not mapped.\n", resource); + goto done; + } + + --resource->map_count; + if (resource->map_count) + goto done; + + if (!heap->map_count) + { + ERR("Heap %p is not mapped.\n", heap); + goto done; + } + + --heap->map_count; + if (!heap->map_count) + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + TRACE("Unmapping heap %p, ptr %p.\n", heap, heap->map_ptr); + + VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory)); + heap->map_ptr = NULL; + } + +done: + vkd3d_mutex_unlock(&heap->mutex); +} + +static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) +{ + if (!resource && !desc->SizeInBytes) + { + WARN("Invalid size %"PRIu64".\n", desc->SizeInBytes); + return E_INVALIDARG; + } + + if (desc->Alignment != D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT + && desc->Alignment != D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT) + { + WARN("Invalid alignment %"PRIu64".\n", desc->Alignment); + return E_INVALIDARG; + } + + if (!resource && desc->Flags & D3D12_HEAP_FLAG_ALLOW_DISPLAY) + { + WARN("D3D12_HEAP_FLAG_ALLOW_DISPLAY is only for committed resources.\n"); + return E_INVALIDARG; + } + + return S_OK; +} + +static HRESULT d3d12_heap_init(struct d3d12_heap *heap, + struct d3d12_device *device, const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) +{ + VkMemoryRequirements memory_requirements; + VkDeviceSize vk_memory_size; + HRESULT hr; + int rc; + + heap->ID3D12Heap_iface.lpVtbl = &d3d12_heap_vtbl; + heap->refcount = 1; + + heap->is_private = !!resource; + + heap->desc = *desc; + + heap->map_ptr = NULL; + heap->map_count = 0; + + if (!heap->desc.Properties.CreationNodeMask) + heap->desc.Properties.CreationNodeMask = 1; + if (!heap->desc.Properties.VisibleNodeMask) + heap->desc.Properties.VisibleNodeMask = 1; + + debug_ignored_node_mask(heap->desc.Properties.CreationNodeMask); + debug_ignored_node_mask(heap->desc.Properties.VisibleNodeMask); + + if (!heap->desc.Alignment) + heap->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + + if (FAILED(hr = validate_heap_desc(&heap->desc, resource))) + return hr; + + if ((rc = vkd3d_mutex_init(&heap->mutex))) + { + ERR("Failed to initialize mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if (FAILED(hr = vkd3d_private_store_init(&heap->private_store))) + { + vkd3d_mutex_destroy(&heap->mutex); + return hr; + } + + if (resource) + { + if (d3d12_resource_is_buffer(resource)) + { + hr = vkd3d_allocate_buffer_memory(device, resource->u.vk_buffer, + &heap->desc.Properties, heap->desc.Flags, + &heap->vk_memory, &heap->vk_memory_type, &vk_memory_size); + } + else + { + hr = vkd3d_allocate_image_memory(device, resource->u.vk_image, + &heap->desc.Properties, heap->desc.Flags, + &heap->vk_memory, &heap->vk_memory_type, &vk_memory_size); + } + + heap->desc.SizeInBytes = vk_memory_size; + } + else + { + memory_requirements.size = heap->desc.SizeInBytes; + memory_requirements.alignment = heap->desc.Alignment; + memory_requirements.memoryTypeBits = ~(uint32_t)0; + + hr = vkd3d_allocate_device_memory(device, &heap->desc.Properties, + heap->desc.Flags, &memory_requirements, NULL, + &heap->vk_memory, &heap->vk_memory_type); + } + if (FAILED(hr)) + { + vkd3d_private_store_destroy(&heap->private_store); + vkd3d_mutex_destroy(&heap->mutex); + return hr; + } + + heap->device = device; + if (!heap->is_private) + d3d12_device_add_ref(heap->device); + + return S_OK; +} + +HRESULT d3d12_heap_create(struct d3d12_device *device, const D3D12_HEAP_DESC *desc, + const struct d3d12_resource *resource, struct d3d12_heap **heap) +{ + struct d3d12_heap *object; + HRESULT hr; + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_heap_init(object, device, desc, resource))) + { + vkd3d_free(object); + return hr; + } + + TRACE("Created %s %p.\n", object->is_private ? "private heap" : "heap", object); + + *heap = object; + + return S_OK; +} + +static VkImageType vk_image_type_from_d3d12_resource_dimension(D3D12_RESOURCE_DIMENSION dimension) +{ + switch (dimension) + { + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: + return VK_IMAGE_TYPE_1D; + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: + return VK_IMAGE_TYPE_2D; + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: + return VK_IMAGE_TYPE_3D; + default: + ERR("Invalid resource dimension %#x.\n", dimension); + return VK_IMAGE_TYPE_2D; + } +} + +VkSampleCountFlagBits vk_samples_from_sample_count(unsigned int sample_count) +{ + switch (sample_count) + { + case 1: + return VK_SAMPLE_COUNT_1_BIT; + case 2: + return VK_SAMPLE_COUNT_2_BIT; + case 4: + return VK_SAMPLE_COUNT_4_BIT; + case 8: + return VK_SAMPLE_COUNT_8_BIT; + case 16: + return VK_SAMPLE_COUNT_16_BIT; + case 32: + return VK_SAMPLE_COUNT_32_BIT; + case 64: + return VK_SAMPLE_COUNT_64_BIT; + default: + return 0; + } +} + +VkSampleCountFlagBits vk_samples_from_dxgi_sample_desc(const DXGI_SAMPLE_DESC *desc) +{ + VkSampleCountFlagBits vk_samples; + + if ((vk_samples = vk_samples_from_sample_count(desc->Count))) + return vk_samples; + + FIXME("Unhandled sample count %u.\n", desc->Count); + return VK_SAMPLE_COUNT_1_BIT; +} + +HRESULT vkd3d_create_buffer(struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, VkBuffer *vk_buffer) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + const bool sparse_resource = !heap_properties; + VkBufferCreateInfo buffer_info; + D3D12_HEAP_TYPE heap_type; + VkResult vr; + + heap_type = heap_properties ? heap_properties->Type : D3D12_HEAP_TYPE_DEFAULT; + + buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_info.pNext = NULL; + buffer_info.flags = 0; + buffer_info.size = desc->Width; + + if (sparse_resource) + { + buffer_info.flags |= VK_BUFFER_CREATE_SPARSE_BINDING_BIT; + if (device->vk_info.sparse_properties.residencyNonResidentStrict) + buffer_info.flags |= VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT; + } + + buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT + | VK_BUFFER_USAGE_TRANSFER_DST_BIT + | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT + | VK_BUFFER_USAGE_INDEX_BUFFER_BIT + | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT + | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; + + if (device->vk_info.EXT_conditional_rendering) + buffer_info.usage |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT; + + if (heap_type == D3D12_HEAP_TYPE_DEFAULT && device->vk_info.EXT_transform_feedback) + { + buffer_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT + | VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT; + } + + if (heap_type == D3D12_HEAP_TYPE_UPLOAD) + buffer_info.usage &= ~VK_BUFFER_USAGE_TRANSFER_DST_BIT; + else if (heap_type == D3D12_HEAP_TYPE_READBACK) + buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; + + if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) + buffer_info.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; + if (!(desc->Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)) + buffer_info.usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; + + /* Buffers always have properties of D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS. */ + if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) + { + WARN("D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS cannot be set for buffers.\n"); + return E_INVALIDARG; + } + + if (device->queue_family_count > 1) + { + buffer_info.sharingMode = VK_SHARING_MODE_CONCURRENT; + buffer_info.queueFamilyIndexCount = device->queue_family_count; + buffer_info.pQueueFamilyIndices = device->queue_family_indices; + } + else + { + buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_info.queueFamilyIndexCount = 0; + buffer_info.pQueueFamilyIndices = NULL; + } + + if (desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) + FIXME("Unsupported resource flags %#x.\n", desc->Flags); + + if ((vr = VK_CALL(vkCreateBuffer(device->vk_device, &buffer_info, NULL, vk_buffer))) < 0) + { + WARN("Failed to create Vulkan buffer, vr %d.\n", vr); + *vk_buffer = VK_NULL_HANDLE; + } + + return hresult_from_vk_result(vr); +} + +static unsigned int max_miplevel_count(const D3D12_RESOURCE_DESC *desc) +{ + unsigned int size = max(desc->Width, desc->Height); + size = max(size, d3d12_resource_desc_get_depth(desc, 0)); + return vkd3d_log2i(size) + 1; +} + +static const struct vkd3d_format_compatibility_list *vkd3d_get_format_compatibility_list( + const struct d3d12_device *device, DXGI_FORMAT dxgi_format) +{ + unsigned int i; + + for (i = 0; i < device->format_compatibility_list_count; ++i) + { + if (device->format_compatibility_lists[i].typeless_format == dxgi_format) + return &device->format_compatibility_lists[i]; + } + + return NULL; +} + +static bool vkd3d_is_linear_tiling_supported(const struct d3d12_device *device, VkImageCreateInfo *image_info) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkImageFormatProperties properties; + VkResult vr; + + if ((vr = VK_CALL(vkGetPhysicalDeviceImageFormatProperties(device->vk_physical_device, image_info->format, + image_info->imageType, VK_IMAGE_TILING_LINEAR, image_info->usage, image_info->flags, &properties))) < 0) + { + if (vr != VK_ERROR_FORMAT_NOT_SUPPORTED) + WARN("Failed to get device image format properties, vr %d.\n", vr); + + return false; + } + + return image_info->extent.depth <= properties.maxExtent.depth + && image_info->mipLevels <= properties.maxMipLevels + && image_info->arrayLayers <= properties.maxArrayLayers + && (image_info->samples & properties.sampleCounts); +} + +static HRESULT vkd3d_create_image(struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, struct d3d12_resource *resource, VkImage *vk_image) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + const struct vkd3d_format_compatibility_list *compat_list; + const bool sparse_resource = !heap_properties; + VkImageFormatListCreateInfoKHR format_list; + const struct vkd3d_format *format; + VkImageCreateInfo image_info; + VkResult vr; + + if (resource) + { + format = resource->format; + } + else if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) + { + WARN("Invalid DXGI format %#x.\n", desc->Format); + return E_INVALIDARG; + } + + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_info.pNext = NULL; + image_info.flags = 0; + if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) + { + /* Format compatibility rules are more relaxed for UAVs. */ + if (format->type != VKD3D_FORMAT_TYPE_UINT) + image_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + } + else if (!(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) && format->type == VKD3D_FORMAT_TYPE_TYPELESS) + { + image_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + + if ((compat_list = vkd3d_get_format_compatibility_list(device, desc->Format))) + { + format_list.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR; + format_list.pNext = NULL; + format_list.viewFormatCount = compat_list->format_count; + format_list.pViewFormats = compat_list->vk_formats; + + image_info.pNext = &format_list; + } + } + if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D + && desc->Width == desc->Height && desc->DepthOrArraySize >= 6 + && desc->SampleDesc.Count == 1) + image_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) + image_info.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT_KHR; + + if (sparse_resource) + { + image_info.flags |= VK_IMAGE_CREATE_SPARSE_BINDING_BIT; + if (device->vk_info.sparse_properties.residencyNonResidentStrict) + image_info.flags |= VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT; + } + + image_info.imageType = vk_image_type_from_d3d12_resource_dimension(desc->Dimension); + image_info.format = format->vk_format; + image_info.extent.width = desc->Width; + image_info.extent.height = desc->Height; + + if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) + { + image_info.extent.depth = desc->DepthOrArraySize; + image_info.arrayLayers = 1; + } + else + { + image_info.extent.depth = 1; + image_info.arrayLayers = desc->DepthOrArraySize; + } + + image_info.mipLevels = min(desc->MipLevels, max_miplevel_count(desc)); + image_info.samples = vk_samples_from_dxgi_sample_desc(&desc->SampleDesc); + + if (sparse_resource) + { + if (desc->Layout != D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE) + { + WARN("D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE must be used for reserved texture.\n"); + return E_INVALIDARG; + } + + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + } + else if (desc->Layout == D3D12_TEXTURE_LAYOUT_UNKNOWN) + { + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + } + else if (desc->Layout == D3D12_TEXTURE_LAYOUT_ROW_MAJOR) + { + image_info.tiling = VK_IMAGE_TILING_LINEAR; + } + else + { + FIXME("Unsupported layout %#x.\n", desc->Layout); + return E_NOTIMPL; + } + + image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) + image_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) + image_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) + image_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT; + if (!(desc->Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)) + image_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + + if ((desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) && device->queue_family_count > 1) + { + TRACE("Creating image with VK_SHARING_MODE_CONCURRENT.\n"); + image_info.sharingMode = VK_SHARING_MODE_CONCURRENT; + image_info.queueFamilyIndexCount = device->queue_family_count; + image_info.pQueueFamilyIndices = device->queue_family_indices; + } + else + { + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_info.queueFamilyIndexCount = 0; + image_info.pQueueFamilyIndices = NULL; + } + + if (heap_properties && is_cpu_accessible_heap(heap_properties)) + { + image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + + if (vkd3d_is_linear_tiling_supported(device, &image_info)) + { + /* Required for ReadFromSubresource(). */ + WARN("Forcing VK_IMAGE_TILING_LINEAR for CPU readable texture.\n"); + image_info.tiling = VK_IMAGE_TILING_LINEAR; + } + } + else + { + image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + } + + if (resource && image_info.tiling == VK_IMAGE_TILING_LINEAR) + resource->flags |= VKD3D_RESOURCE_LINEAR_TILING; + + if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, vk_image))) < 0) + WARN("Failed to create Vulkan image, vr %d.\n", vr); + + return hresult_from_vk_result(vr); +} + +HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_ALLOCATION_INFO *allocation_info) +{ + static const D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_DEFAULT}; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + D3D12_RESOURCE_DESC validated_desc; + VkMemoryRequirements requirements; + VkImage vk_image; + HRESULT hr; + + assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); + assert(d3d12_resource_validate_desc(desc, device) == S_OK); + + if (!desc->MipLevels) + { + validated_desc = *desc; + validated_desc.MipLevels = max_miplevel_count(desc); + desc = &validated_desc; + } + + /* XXX: We have to create an image to get its memory requirements. */ + if (SUCCEEDED(hr = vkd3d_create_image(device, &heap_properties, 0, desc, NULL, &vk_image))) + { + VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &requirements)); + VK_CALL(vkDestroyImage(device->vk_device, vk_image, NULL)); + + allocation_info->SizeInBytes = requirements.size; + allocation_info->Alignment = requirements.alignment; + } + + return hr; +} + +static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + if (resource->flags & VKD3D_RESOURCE_EXTERNAL) + return; + + if (resource->gpu_address) + vkd3d_gpu_va_allocator_free(&device->gpu_va_allocator, resource->gpu_address); + + if (d3d12_resource_is_buffer(resource)) + VK_CALL(vkDestroyBuffer(device->vk_device, resource->u.vk_buffer, NULL)); + else + VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL)); + + if (resource->flags & VKD3D_RESOURCE_DEDICATED_HEAP) + d3d12_heap_destroy(resource->heap); +} + +static ULONG d3d12_resource_incref(struct d3d12_resource *resource) +{ + ULONG refcount = InterlockedIncrement(&resource->internal_refcount); + + TRACE("%p increasing refcount to %u.\n", resource, refcount); + + return refcount; +} + +static ULONG d3d12_resource_decref(struct d3d12_resource *resource) +{ + ULONG refcount = InterlockedDecrement(&resource->internal_refcount); + + TRACE("%p decreasing refcount to %u.\n", resource, refcount); + + if (!refcount) + { + vkd3d_private_store_destroy(&resource->private_store); + d3d12_resource_destroy(resource, resource->device); + vkd3d_free(resource); + } + + return refcount; +} + +bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource) +{ + return resource->heap && is_cpu_accessible_heap(&resource->heap->desc.Properties); +} + +static bool d3d12_resource_validate_box(const struct d3d12_resource *resource, + unsigned int sub_resource_idx, const D3D12_BOX *box) +{ + unsigned int mip_level = sub_resource_idx % resource->desc.MipLevels; + const struct vkd3d_format *vkd3d_format; + uint32_t width_mask, height_mask; + uint64_t width, height, depth; + + width = d3d12_resource_desc_get_width(&resource->desc, mip_level); + height = d3d12_resource_desc_get_height(&resource->desc, mip_level); + depth = d3d12_resource_desc_get_depth(&resource->desc, mip_level); + + vkd3d_format = resource->format; + assert(vkd3d_format); + width_mask = vkd3d_format->block_width - 1; + height_mask = vkd3d_format->block_height - 1; + + return box->left <= width && box->right <= width + && box->top <= height && box->bottom <= height + && box->front <= depth && box->back <= depth + && !(box->left & width_mask) + && !(box->right & width_mask) + && !(box->top & height_mask) + && !(box->bottom & height_mask); +} + +static void d3d12_resource_get_level_box(const struct d3d12_resource *resource, + unsigned int level, D3D12_BOX *box) +{ + box->left = 0; + box->top = 0; + box->front = 0; + box->right = d3d12_resource_desc_get_width(&resource->desc, level); + box->bottom = d3d12_resource_desc_get_height(&resource->desc, level); + box->back = d3d12_resource_desc_get_depth(&resource->desc, level); +} + +/* ID3D12Resource */ +static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource *iface, + REFIID riid, void **object) +{ + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + + if (IsEqualGUID(riid, &IID_ID3D12Resource) + || IsEqualGUID(riid, &IID_ID3D12Pageable) + || IsEqualGUID(riid, &IID_ID3D12DeviceChild) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { + ID3D12Resource_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_resource_AddRef(ID3D12Resource *iface) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + ULONG refcount = InterlockedIncrement(&resource->refcount); + + TRACE("%p increasing refcount to %u.\n", resource, refcount); + + if (refcount == 1) + { + struct d3d12_device *device = resource->device; + + d3d12_device_add_ref(device); + d3d12_resource_incref(resource); + } + + return refcount; +} + +static ULONG STDMETHODCALLTYPE d3d12_resource_Release(ID3D12Resource *iface) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + ULONG refcount = InterlockedDecrement(&resource->refcount); + + TRACE("%p decreasing refcount to %u.\n", resource, refcount); + + if (!refcount) + { + struct d3d12_device *device = resource->device; + + d3d12_resource_decref(resource); + + d3d12_device_release(device); + } + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_resource_GetPrivateData(ID3D12Resource *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&resource->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_resource_SetPrivateData(ID3D12Resource *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&resource->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_resource_SetPrivateDataInterface(ID3D12Resource *iface, + REFGUID guid, const IUnknown *data) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&resource->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_resource_SetName(ID3D12Resource *iface, const WCHAR *name) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + HRESULT hr; + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, resource->device->wchar_size)); + + if (resource->flags & VKD3D_RESOURCE_DEDICATED_HEAP) + { + if (FAILED(hr = d3d12_heap_SetName(&resource->heap->ID3D12Heap_iface, name))) + return hr; + } + + if (d3d12_resource_is_buffer(resource)) + return vkd3d_set_vk_object_name(resource->device, (uint64_t)resource->u.vk_buffer, + VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, name); + else + return vkd3d_set_vk_object_name(resource->device, (uint64_t)resource->u.vk_image, + VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT, name); +} + +static HRESULT STDMETHODCALLTYPE d3d12_resource_GetDevice(ID3D12Resource *iface, REFIID iid, void **device) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(resource->device, iid, device); +} + +static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT sub_resource, + const D3D12_RANGE *read_range, void **data) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + unsigned int sub_resource_count; + HRESULT hr; + + TRACE("iface %p, sub_resource %u, read_range %p, data %p.\n", + iface, sub_resource, read_range, data); + + if (!d3d12_resource_is_cpu_accessible(resource)) + { + WARN("Resource is not CPU accessible.\n"); + return E_INVALIDARG; + } + + sub_resource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); + if (sub_resource >= sub_resource_count) + { + WARN("Sub-resource index %u is out of range (%u sub-resources).\n", sub_resource, sub_resource_count); + return E_INVALIDARG; + } + + if (d3d12_resource_is_texture(resource)) + { + /* Textures seem to be mappable only on UMA adapters. */ + FIXME("Not implemented for textures.\n"); + return E_INVALIDARG; + } + + if (!resource->heap) + { + FIXME("Not implemented for this resource type.\n"); + return E_NOTIMPL; + } + + WARN("Ignoring read range %p.\n", read_range); + + if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, data))) + WARN("Failed to map resource %p, hr %#x.\n", resource, hr); + + if (data) + TRACE("Returning pointer %p.\n", *data); + + return hr; +} + +static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT sub_resource, + const D3D12_RANGE *written_range) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + unsigned int sub_resource_count; + + TRACE("iface %p, sub_resource %u, written_range %p.\n", + iface, sub_resource, written_range); + + sub_resource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); + if (sub_resource >= sub_resource_count) + { + WARN("Sub-resource index %u is out of range (%u sub-resources).\n", sub_resource, sub_resource_count); + return; + } + + WARN("Ignoring written range %p.\n", written_range); + + d3d12_heap_unmap(resource->heap, resource); +} + +static D3D12_RESOURCE_DESC * STDMETHODCALLTYPE d3d12_resource_GetDesc(ID3D12Resource *iface, + D3D12_RESOURCE_DESC *resource_desc) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + + TRACE("iface %p, resource_desc %p.\n", iface, resource_desc); + + *resource_desc = resource->desc; + return resource_desc; +} + +static D3D12_GPU_VIRTUAL_ADDRESS STDMETHODCALLTYPE d3d12_resource_GetGPUVirtualAddress(ID3D12Resource *iface) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + + TRACE("iface %p.\n", iface); + + return resource->gpu_address; +} + +static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resource *iface, + UINT dst_sub_resource, const D3D12_BOX *dst_box, const void *src_data, + UINT src_row_pitch, UINT src_slice_pitch) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + const struct vkd3d_vk_device_procs *vk_procs; + VkImageSubresource vk_sub_resource; + const struct vkd3d_format *format; + VkSubresourceLayout vk_layout; + struct d3d12_device *device; + uint8_t *dst_data; + D3D12_BOX box; + HRESULT hr; + + TRACE("iface %p, src_data %p, src_row_pitch %u, src_slice_pitch %u, " + "dst_sub_resource %u, dst_box %s.\n", + iface, src_data, src_row_pitch, src_slice_pitch, dst_sub_resource, debug_d3d12_box(dst_box)); + + if (d3d12_resource_is_buffer(resource)) + { + WARN("Buffers are not supported.\n"); + return E_INVALIDARG; + } + + device = resource->device; + vk_procs = &device->vk_procs; + + format = resource->format; + if (format->vk_aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT) + { + FIXME("Not supported for format %#x.\n", format->dxgi_format); + return E_NOTIMPL; + } + + vk_sub_resource.arrayLayer = dst_sub_resource / resource->desc.MipLevels; + vk_sub_resource.mipLevel = dst_sub_resource % resource->desc.MipLevels; + vk_sub_resource.aspectMask = format->vk_aspect_mask; + + if (!dst_box) + { + d3d12_resource_get_level_box(resource, vk_sub_resource.mipLevel, &box); + dst_box = &box; + } + else if (!d3d12_resource_validate_box(resource, dst_sub_resource, dst_box)) + { + WARN("Invalid box %s.\n", debug_d3d12_box(dst_box)); + return E_INVALIDARG; + } + + if (d3d12_box_is_empty(dst_box)) + { + WARN("Empty box %s.\n", debug_d3d12_box(dst_box)); + return S_OK; + } + + if (!d3d12_resource_is_cpu_accessible(resource)) + { + FIXME_ONCE("Not implemented for this resource type.\n"); + return E_NOTIMPL; + } + if (!(resource->flags & VKD3D_RESOURCE_LINEAR_TILING)) + { + FIXME_ONCE("Not implemented for image tiling other than VK_IMAGE_TILING_LINEAR.\n"); + return E_NOTIMPL; + } + + VK_CALL(vkGetImageSubresourceLayout(device->vk_device, resource->u.vk_image, &vk_sub_resource, &vk_layout)); + TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", + vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch); + + if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, (void **)&dst_data))) + { + WARN("Failed to map resource %p, hr %#x.\n", resource, hr); + return hr; + } + + dst_data += vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + vk_layout.depthPitch, dst_box->left, dst_box->top, dst_box->front); + + vkd3d_format_copy_data(format, src_data, src_row_pitch, src_slice_pitch, + dst_data, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->right - dst_box->left, + dst_box->bottom - dst_box->top, dst_box->back - dst_box->front); + + d3d12_heap_unmap(resource->heap, resource); + + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resource *iface, + void *dst_data, UINT dst_row_pitch, UINT dst_slice_pitch, + UINT src_sub_resource, const D3D12_BOX *src_box) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + const struct vkd3d_vk_device_procs *vk_procs; + VkImageSubresource vk_sub_resource; + const struct vkd3d_format *format; + VkSubresourceLayout vk_layout; + struct d3d12_device *device; + uint8_t *src_data; + D3D12_BOX box; + HRESULT hr; + + TRACE("iface %p, dst_data %p, dst_row_pitch %u, dst_slice_pitch %u, " + "src_sub_resource %u, src_box %s.\n", + iface, dst_data, dst_row_pitch, dst_slice_pitch, src_sub_resource, debug_d3d12_box(src_box)); + + if (d3d12_resource_is_buffer(resource)) + { + WARN("Buffers are not supported.\n"); + return E_INVALIDARG; + } + + device = resource->device; + vk_procs = &device->vk_procs; + + format = resource->format; + if (format->vk_aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT) + { + FIXME("Not supported for format %#x.\n", format->dxgi_format); + return E_NOTIMPL; + } + + vk_sub_resource.arrayLayer = src_sub_resource / resource->desc.MipLevels; + vk_sub_resource.mipLevel = src_sub_resource % resource->desc.MipLevels; + vk_sub_resource.aspectMask = format->vk_aspect_mask; + + if (!src_box) + { + d3d12_resource_get_level_box(resource, vk_sub_resource.mipLevel, &box); + src_box = &box; + } + else if (!d3d12_resource_validate_box(resource, src_sub_resource, src_box)) + { + WARN("Invalid box %s.\n", debug_d3d12_box(src_box)); + return E_INVALIDARG; + } + + if (d3d12_box_is_empty(src_box)) + { + WARN("Empty box %s.\n", debug_d3d12_box(src_box)); + return S_OK; + } + + if (!d3d12_resource_is_cpu_accessible(resource)) + { + FIXME_ONCE("Not implemented for this resource type.\n"); + return E_NOTIMPL; + } + if (!(resource->flags & VKD3D_RESOURCE_LINEAR_TILING)) + { + FIXME_ONCE("Not implemented for image tiling other than VK_IMAGE_TILING_LINEAR.\n"); + return E_NOTIMPL; + } + + VK_CALL(vkGetImageSubresourceLayout(device->vk_device, resource->u.vk_image, &vk_sub_resource, &vk_layout)); + TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", + vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch); + + if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, (void **)&src_data))) + { + WARN("Failed to map resource %p, hr %#x.\n", resource, hr); + return hr; + } + + src_data += vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + vk_layout.depthPitch, src_box->left, src_box->top, src_box->front); + + vkd3d_format_copy_data(format, src_data, vk_layout.rowPitch, vk_layout.depthPitch, + dst_data, dst_row_pitch, dst_slice_pitch, src_box->right - src_box->left, + src_box->bottom - src_box->top, src_box->back - src_box->front); + + d3d12_heap_unmap(resource->heap, resource); + + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_resource_GetHeapProperties(ID3D12Resource *iface, + D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS *flags) +{ + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + struct d3d12_heap *heap; + + TRACE("iface %p, heap_properties %p, flags %p.\n", + iface, heap_properties, flags); + + if (resource->flags & VKD3D_RESOURCE_EXTERNAL) + { + if (heap_properties) + { + memset(heap_properties, 0, sizeof(*heap_properties)); + heap_properties->Type = D3D12_HEAP_TYPE_DEFAULT; + heap_properties->CreationNodeMask = 1; + heap_properties->VisibleNodeMask = 1; + } + if (flags) + *flags = D3D12_HEAP_FLAG_NONE; + return S_OK; + } + + if (!(heap = resource->heap)) + { + WARN("Cannot get heap properties for reserved resources.\n"); + return E_INVALIDARG; + } + + if (heap_properties) + *heap_properties = heap->desc.Properties; + if (flags) + *flags = heap->desc.Flags; + + return S_OK; +} + +static const struct ID3D12ResourceVtbl d3d12_resource_vtbl = +{ + /* IUnknown methods */ + d3d12_resource_QueryInterface, + d3d12_resource_AddRef, + d3d12_resource_Release, + /* ID3D12Object methods */ + d3d12_resource_GetPrivateData, + d3d12_resource_SetPrivateData, + d3d12_resource_SetPrivateDataInterface, + d3d12_resource_SetName, + /* ID3D12DeviceChild methods */ + d3d12_resource_GetDevice, + /* ID3D12Resource methods */ + d3d12_resource_Map, + d3d12_resource_Unmap, + d3d12_resource_GetDesc, + d3d12_resource_GetGPUVirtualAddress, + d3d12_resource_WriteToSubresource, + d3d12_resource_ReadFromSubresource, + d3d12_resource_GetHeapProperties, +}; + +struct d3d12_resource *unsafe_impl_from_ID3D12Resource(ID3D12Resource *iface) +{ + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_resource_vtbl); + return impl_from_ID3D12Resource(iface); +} + +static void d3d12_validate_resource_flags(D3D12_RESOURCE_FLAGS flags) +{ + unsigned int unknown_flags = flags & ~(D3D12_RESOURCE_FLAG_NONE + | D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET + | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL + | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS + | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE + | D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER + | D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS); + + if (unknown_flags) + FIXME("Unknown resource flags %#x.\n", unknown_flags); + if (flags & D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER) + FIXME("Ignoring D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER.\n"); +} + +static bool d3d12_resource_validate_texture_format(const D3D12_RESOURCE_DESC *desc, + const struct vkd3d_format *format) +{ + if (desc->Format == DXGI_FORMAT_UNKNOWN) + { + WARN("DXGI_FORMAT_UNKNOWN is invalid for textures.\n"); + return false; + } + + if (!vkd3d_format_is_compressed(format)) + return true; + + if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE1D && format->block_height > 1) + { + WARN("1D texture with a format block height > 1.\n"); + return false; + } + + if (align(desc->Width, format->block_width) != desc->Width + || align(desc->Height, format->block_height) != desc->Height) + { + WARN("Invalid size %"PRIu64"x%u for block compressed format %#x.\n", + desc->Width, desc->Height, desc->Format); + return false; + } + + return true; +} + +static bool d3d12_resource_validate_texture_alignment(const D3D12_RESOURCE_DESC *desc, + const struct vkd3d_format *format) +{ + uint64_t estimated_size; + + if (!desc->Alignment) + return true; + + if (desc->Alignment != D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT + && desc->Alignment != D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT + && (desc->SampleDesc.Count == 1 || desc->Alignment != D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT)) + { + WARN("Invalid resource alignment %#"PRIx64".\n", desc->Alignment); + return false; + } + + if (desc->Alignment < D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT) + { + /* Windows uses the slice size to determine small alignment eligibility. DepthOrArraySize is ignored. */ + estimated_size = desc->Width * desc->Height * format->byte_count * format->block_byte_count + / (format->block_width * format->block_height); + if (estimated_size > D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT) + { + WARN("Invalid resource alignment %#"PRIx64" (required %#x).\n", + desc->Alignment, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT); + return false; + } + } + + /* The size check for MSAA textures with D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT is probably + * not important. The 4MB requirement is no longer universal and Vulkan has no such requirement. */ + + return true; +} + +HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d12_device *device) +{ + const struct vkd3d_format *format; + + switch (desc->Dimension) + { + case D3D12_RESOURCE_DIMENSION_BUFFER: + if (desc->MipLevels != 1) + { + WARN("Invalid miplevel count %u for buffer.\n", desc->MipLevels); + return E_INVALIDARG; + } + + if (desc->Format != DXGI_FORMAT_UNKNOWN || desc->Layout != D3D12_TEXTURE_LAYOUT_ROW_MAJOR + || desc->Height != 1 || desc->DepthOrArraySize != 1 + || desc->SampleDesc.Count != 1 || desc->SampleDesc.Quality != 0 + || (desc->Alignment != 0 && desc->Alignment != D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT)) + { + WARN("Invalid parameters for a buffer resource.\n"); + return E_INVALIDARG; + } + break; + + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: + if (desc->Height != 1) + { + WARN("1D texture with a height of %u.\n", desc->Height); + return E_INVALIDARG; + } + /* Fall through. */ + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: + if (!desc->SampleDesc.Count) + { + WARN("Invalid sample count 0.\n"); + return E_INVALIDARG; + } + + if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) + { + WARN("Invalid format %#x.\n", desc->Format); + return E_INVALIDARG; + } + + if (!d3d12_resource_validate_texture_format(desc, format) + || !d3d12_resource_validate_texture_alignment(desc, format)) + return E_INVALIDARG; + break; + + default: + WARN("Invalid resource dimension %#x.\n", desc->Dimension); + return E_INVALIDARG; + } + + d3d12_validate_resource_flags(desc->Flags); + + return S_OK; +} + +static bool d3d12_resource_validate_heap_properties(const struct d3d12_resource *resource, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_RESOURCE_STATES initial_state) +{ + if (heap_properties->Type == D3D12_HEAP_TYPE_UPLOAD + || heap_properties->Type == D3D12_HEAP_TYPE_READBACK) + { + if (d3d12_resource_is_texture(resource)) + { + WARN("Textures cannot be created on upload/readback heaps.\n"); + return false; + } + + if (resource->desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)) + { + WARN("Render target and unordered access buffers cannot be created on upload/readback heaps.\n"); + return false; + } + } + + if (heap_properties->Type == D3D12_HEAP_TYPE_UPLOAD && initial_state != D3D12_RESOURCE_STATE_GENERIC_READ) + { + WARN("For D3D12_HEAP_TYPE_UPLOAD the state must be D3D12_RESOURCE_STATE_GENERIC_READ.\n"); + return false; + } + if (heap_properties->Type == D3D12_HEAP_TYPE_READBACK && initial_state != D3D12_RESOURCE_STATE_COPY_DEST) + { + WARN("For D3D12_HEAP_TYPE_READBACK the state must be D3D12_RESOURCE_STATE_COPY_DEST.\n"); + return false; + } + + return true; +} + +static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value) +{ + HRESULT hr; + + resource->ID3D12Resource_iface.lpVtbl = &d3d12_resource_vtbl; + resource->refcount = 1; + resource->internal_refcount = 1; + + resource->desc = *desc; + + if (heap_properties && !d3d12_resource_validate_heap_properties(resource, heap_properties, initial_state)) + return E_INVALIDARG; + + if (!is_valid_resource_state(initial_state)) + { + WARN("Invalid initial resource state %#x.\n", initial_state); + return E_INVALIDARG; + } + + if (optimized_clear_value && d3d12_resource_is_buffer(resource)) + { + WARN("Optimized clear value must be NULL for buffers.\n"); + return E_INVALIDARG; + } + + if (optimized_clear_value) + WARN("Ignoring optimized clear value.\n"); + + resource->gpu_address = 0; + resource->flags = 0; + + if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc, device))) + return hr; + + resource->format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0); + + switch (desc->Dimension) + { + case D3D12_RESOURCE_DIMENSION_BUFFER: + if (FAILED(hr = vkd3d_create_buffer(device, heap_properties, heap_flags, + &resource->desc, &resource->u.vk_buffer))) + return hr; + if (!(resource->gpu_address = vkd3d_gpu_va_allocator_allocate(&device->gpu_va_allocator, + desc->Alignment ? desc->Alignment : D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + desc->Width, resource))) + { + ERR("Failed to allocate GPU VA.\n"); + d3d12_resource_destroy(resource, device); + return E_OUTOFMEMORY; + } + break; + + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: + if (!resource->desc.MipLevels) + resource->desc.MipLevels = max_miplevel_count(desc); + resource->flags |= VKD3D_RESOURCE_INITIAL_STATE_TRANSITION; + if (FAILED(hr = vkd3d_create_image(device, heap_properties, heap_flags, + &resource->desc, resource, &resource->u.vk_image))) + return hr; + break; + + default: + WARN("Invalid resource dimension %#x.\n", resource->desc.Dimension); + return E_INVALIDARG; + } + + resource->map_count = 0; + + resource->initial_state = initial_state; + + resource->heap = NULL; + resource->heap_offset = 0; + + if (FAILED(hr = vkd3d_private_store_init(&resource->private_store))) + { + d3d12_resource_destroy(resource, device); + return hr; + } + + d3d12_device_add_ref(resource->device = device); + + return S_OK; +} + +static HRESULT d3d12_resource_create(struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource) +{ + struct d3d12_resource *object; + HRESULT hr; + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_resource_init(object, device, heap_properties, heap_flags, + desc, initial_state, optimized_clear_value))) + { + vkd3d_free(object); + return hr; + } + + *resource = object; + + return hr; +} + +static HRESULT vkd3d_allocate_resource_memory( + struct d3d12_device *device, struct d3d12_resource *resource, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags) +{ + D3D12_HEAP_DESC heap_desc; + HRESULT hr; + + heap_desc.SizeInBytes = 0; + heap_desc.Properties = *heap_properties; + heap_desc.Alignment = 0; + heap_desc.Flags = heap_flags; + if (SUCCEEDED(hr = d3d12_heap_create(device, &heap_desc, resource, &resource->heap))) + resource->flags |= VKD3D_RESOURCE_DEDICATED_HEAP; + return hr; +} + +HRESULT d3d12_committed_resource_create(struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource) +{ + struct d3d12_resource *object; + HRESULT hr; + + if (!heap_properties) + { + WARN("Heap properties are NULL.\n"); + return E_INVALIDARG; + } + + if (FAILED(hr = d3d12_resource_create(device, heap_properties, heap_flags, + desc, initial_state, optimized_clear_value, &object))) + return hr; + + if (FAILED(hr = vkd3d_allocate_resource_memory(device, object, heap_properties, heap_flags))) + { + d3d12_resource_Release(&object->ID3D12Resource_iface); + return hr; + } + + TRACE("Created committed resource %p.\n", object); + + *resource = object; + + return S_OK; +} + +static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, + struct d3d12_resource *resource, struct d3d12_heap *heap, uint64_t heap_offset) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDevice vk_device = device->vk_device; + VkMemoryRequirements requirements; + VkResult vr; + + if (d3d12_resource_is_buffer(resource)) + { + VK_CALL(vkGetBufferMemoryRequirements(vk_device, resource->u.vk_buffer, &requirements)); + } + else + { + VK_CALL(vkGetImageMemoryRequirements(vk_device, resource->u.vk_image, &requirements)); + /* Padding in d3d12_device_GetResourceAllocationInfo() leaves room to align the offset. */ + heap_offset = align(heap_offset, requirements.alignment); + } + + if (heap_offset > heap->desc.SizeInBytes || requirements.size > heap->desc.SizeInBytes - heap_offset) + { + ERR("Heap too small for the resource (offset %"PRIu64", resource size %"PRIu64", heap size %"PRIu64".\n", + heap_offset, requirements.size, heap->desc.SizeInBytes); + return E_INVALIDARG; + } + + if (heap_offset % requirements.alignment) + { + FIXME("Invalid heap offset %#"PRIx64" (alignment %#"PRIx64").\n", + heap_offset, requirements.alignment); + goto allocate_memory; + } + + if (!(requirements.memoryTypeBits & (1u << heap->vk_memory_type))) + { + FIXME("Memory type %u cannot be bound to resource %p (allowed types %#x).\n", + heap->vk_memory_type, resource, requirements.memoryTypeBits); + goto allocate_memory; + } + + /* Syncronisation is not required for binding, but vkMapMemory() may be called + * from another thread and it requires exclusive access. */ + vkd3d_mutex_lock(&heap->mutex); + + if (d3d12_resource_is_buffer(resource)) + vr = VK_CALL(vkBindBufferMemory(vk_device, resource->u.vk_buffer, heap->vk_memory, heap_offset)); + else + vr = VK_CALL(vkBindImageMemory(vk_device, resource->u.vk_image, heap->vk_memory, heap_offset)); + + vkd3d_mutex_unlock(&heap->mutex); + + if (vr == VK_SUCCESS) + { + resource->heap = heap; + resource->heap_offset = heap_offset; + } + else + { + WARN("Failed to bind memory, vr %d.\n", vr); + } + + return hresult_from_vk_result(vr); + +allocate_memory: + FIXME("Allocating device memory.\n"); + return vkd3d_allocate_resource_memory(device, resource, &heap->desc.Properties, heap->desc.Flags); +} + +HRESULT d3d12_placed_resource_create(struct d3d12_device *device, struct d3d12_heap *heap, uint64_t heap_offset, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource) +{ + struct d3d12_resource *object; + HRESULT hr; + + if (FAILED(hr = d3d12_resource_create(device, &heap->desc.Properties, heap->desc.Flags, + desc, initial_state, optimized_clear_value, &object))) + return hr; + + if (FAILED(hr = vkd3d_bind_heap_memory(device, object, heap, heap_offset))) + { + d3d12_resource_Release(&object->ID3D12Resource_iface); + return hr; + } + + TRACE("Created placed resource %p.\n", object); + + *resource = object; + + return S_OK; +} + +HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource) +{ + struct d3d12_resource *object; + HRESULT hr; + + if (FAILED(hr = d3d12_resource_create(device, NULL, 0, + desc, initial_state, optimized_clear_value, &object))) + return hr; + + TRACE("Created reserved resource %p.\n", object); + + *resource = object; + + return S_OK; +} + +HRESULT vkd3d_create_image_resource(ID3D12Device *device, + const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) +{ + struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device(device); + struct d3d12_resource *object; + HRESULT hr; + + TRACE("device %p, create_info %p, resource %p.\n", device, create_info, resource); + + if (!create_info || !resource) + return E_INVALIDARG; + if (create_info->type != VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO) + { + WARN("Invalid structure type %#x.\n", create_info->type); + return E_INVALIDARG; + } + if (create_info->next) + WARN("Unhandled next %p.\n", create_info->next); + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + memset(object, 0, sizeof(*object)); + + object->ID3D12Resource_iface.lpVtbl = &d3d12_resource_vtbl; + object->refcount = 1; + object->internal_refcount = 1; + object->desc = create_info->desc; + object->format = vkd3d_format_from_d3d12_resource_desc(d3d12_device, &create_info->desc, 0); + object->u.vk_image = create_info->vk_image; + object->flags = VKD3D_RESOURCE_EXTERNAL; + object->flags |= create_info->flags & VKD3D_RESOURCE_PUBLIC_FLAGS; + object->initial_state = D3D12_RESOURCE_STATE_COMMON; + if (create_info->flags & VKD3D_RESOURCE_PRESENT_STATE_TRANSITION) + object->present_state = create_info->present_state; + else + object->present_state = D3D12_RESOURCE_STATE_COMMON; + + if (FAILED(hr = vkd3d_private_store_init(&object->private_store))) + { + vkd3d_free(object); + return hr; + } + + d3d12_device_add_ref(object->device = d3d12_device); + + TRACE("Created resource %p.\n", object); + + *resource = &object->ID3D12Resource_iface; + + return S_OK; +} + +ULONG vkd3d_resource_incref(ID3D12Resource *resource) +{ + TRACE("resource %p.\n", resource); + return d3d12_resource_incref(impl_from_ID3D12Resource(resource)); +} + +ULONG vkd3d_resource_decref(ID3D12Resource *resource) +{ + TRACE("resource %p.\n", resource); + return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); +} + +/* CBVs, SRVs, UAVs */ +static struct vkd3d_view *vkd3d_view_create(enum vkd3d_view_type type) +{ + struct vkd3d_view *view; + + if ((view = vkd3d_malloc(sizeof(*view)))) + { + view->refcount = 1; + view->type = type; + view->vk_counter_view = VK_NULL_HANDLE; + } + return view; +} + +void vkd3d_view_incref(struct vkd3d_view *view) +{ + InterlockedIncrement(&view->refcount); +} + +static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + TRACE("Destroying view %p.\n", view); + + switch (view->type) + { + case VKD3D_VIEW_TYPE_BUFFER: + VK_CALL(vkDestroyBufferView(device->vk_device, view->u.vk_buffer_view, NULL)); + break; + case VKD3D_VIEW_TYPE_IMAGE: + VK_CALL(vkDestroyImageView(device->vk_device, view->u.vk_image_view, NULL)); + break; + case VKD3D_VIEW_TYPE_SAMPLER: + VK_CALL(vkDestroySampler(device->vk_device, view->u.vk_sampler, NULL)); + break; + default: + WARN("Unhandled view type %d.\n", view->type); + } + + if (view->vk_counter_view) + VK_CALL(vkDestroyBufferView(device->vk_device, view->vk_counter_view, NULL)); + + vkd3d_free(view); +} + +void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) +{ + if (!InterlockedDecrement(&view->refcount)) + vkd3d_view_destroy(view, device); +} + +void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) +{ + struct vkd3d_view *defunct_view = NULL; + struct vkd3d_mutex *mutex; + + mutex = d3d12_device_get_descriptor_mutex(device, dst); + vkd3d_mutex_lock(mutex); + + /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ + if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + && !InterlockedDecrement(&dst->u.view->refcount)) + defunct_view = dst->u.view; + + *dst = *src; + + vkd3d_mutex_unlock(mutex); + + /* Destroy the view after unlocking to reduce wait time. */ + if (defunct_view) + vkd3d_view_destroy(defunct_view, device); +} + +static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) +{ + static const struct d3d12_desc null_desc = {0}; + + d3d12_desc_write_atomic(descriptor, &null_desc, device); +} + +void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) +{ + struct d3d12_desc tmp; + struct vkd3d_mutex *mutex; + + assert(dst != src); + + /* Shadow of the Tomb Raider and possibly other titles sometimes destroy + * and rewrite a descriptor in another thread while it is being copied. */ + mutex = d3d12_device_get_descriptor_mutex(device, src); + vkd3d_mutex_lock(mutex); + + if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + vkd3d_view_incref(src->u.view); + + tmp = *src; + + vkd3d_mutex_unlock(mutex); + + d3d12_desc_write_atomic(dst, &tmp, device); +} + +static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12_device *device, + const struct vkd3d_format *format) +{ + const VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties; + const struct vkd3d_vulkan_info *vk_info = &device->vk_info; + VkDeviceSize alignment; + + if (vk_info->EXT_texel_buffer_alignment) + { + properties = &vk_info->texel_buffer_alignment_properties; + + alignment = max(properties->storageTexelBufferOffsetAlignmentBytes, + properties->uniformTexelBufferOffsetAlignmentBytes); + + if (properties->storageTexelBufferOffsetSingleTexelAlignment + && properties->uniformTexelBufferOffsetSingleTexelAlignment) + { + assert(!vkd3d_format_is_compressed(format)); + return min(format->byte_count, alignment); + } + + return alignment; + } + + return vk_info->device_limits.minTexelBufferOffsetAlignment; +} + +static bool vkd3d_create_vk_buffer_view(struct d3d12_device *device, + VkBuffer vk_buffer, const struct vkd3d_format *format, + VkDeviceSize offset, VkDeviceSize range, VkBufferView *vk_view) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct VkBufferViewCreateInfo view_desc; + VkDeviceSize alignment; + VkResult vr; + + if (vkd3d_format_is_compressed(format)) + { + WARN("Invalid format for buffer view %#x.\n", format->dxgi_format); + return false; + } + + alignment = vkd3d_get_required_texel_buffer_alignment(device, format); + if (offset % alignment) + FIXME("Offset %#"PRIx64" violates the required alignment %#"PRIx64".\n", offset, alignment); + + view_desc.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; + view_desc.pNext = NULL; + view_desc.flags = 0; + view_desc.buffer = vk_buffer; + view_desc.format = format->vk_format; + view_desc.offset = offset; + view_desc.range = range; + if ((vr = VK_CALL(vkCreateBufferView(device->vk_device, &view_desc, NULL, vk_view))) < 0) + WARN("Failed to create Vulkan buffer view, vr %d.\n", vr); + return vr == VK_SUCCESS; +} + +bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, + VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_view *object; + VkBufferView vk_view; + + if (!vkd3d_create_vk_buffer_view(device, vk_buffer, format, offset, size, &vk_view)) + return false; + + if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_BUFFER))) + { + VK_CALL(vkDestroyBufferView(device->vk_device, vk_view, NULL)); + return false; + } + + object->u.vk_buffer_view = vk_view; + object->format = format; + object->info.buffer.offset = offset; + object->info.buffer.size = size; + *view = object; + return true; +} + +#define VKD3D_VIEW_RAW_BUFFER 0x1 + +static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, + struct d3d12_resource *resource, DXGI_FORMAT view_format, + unsigned int offset, unsigned int size, unsigned int structure_stride, + unsigned int flags, struct vkd3d_view **view) +{ + const struct vkd3d_format *format; + VkDeviceSize element_size; + + if (view_format == DXGI_FORMAT_R32_TYPELESS && (flags & VKD3D_VIEW_RAW_BUFFER)) + { + format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); + element_size = format->byte_count; + } + else if (view_format == DXGI_FORMAT_UNKNOWN && structure_stride) + { + format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); + element_size = structure_stride; + } + else if ((format = vkd3d_format_from_d3d12_resource_desc(device, &resource->desc, view_format))) + { + /* TODO: if view_format is DXGI_FORMAT_UNKNOWN, this is always 1, which + * may not match driver behaviour (return false?). */ + element_size = format->byte_count; + } + else + { + WARN("Failed to find format for %#x.\n", resource->desc.Format); + return false; + } + + assert(d3d12_resource_is_buffer(resource)); + + return vkd3d_create_buffer_view(device, resource->u.vk_buffer, + format, offset * element_size, size * element_size, view); +} + +static void vkd3d_set_view_swizzle_for_format(VkComponentMapping *components, + const struct vkd3d_format *format, bool allowed_swizzle) +{ + components->r = VK_COMPONENT_SWIZZLE_R; + components->g = VK_COMPONENT_SWIZZLE_G; + components->b = VK_COMPONENT_SWIZZLE_B; + components->a = VK_COMPONENT_SWIZZLE_A; + + if (format->vk_aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) + { + if (allowed_swizzle) + { + components->r = VK_COMPONENT_SWIZZLE_ZERO; + components->g = VK_COMPONENT_SWIZZLE_R; + components->b = VK_COMPONENT_SWIZZLE_ZERO; + components->a = VK_COMPONENT_SWIZZLE_ZERO; + } + else + { + FIXME("Stencil swizzle is not supported for format %#x.\n", + format->dxgi_format); + } + } + + if (format->dxgi_format == DXGI_FORMAT_A8_UNORM) + { + if (allowed_swizzle) + { + components->r = VK_COMPONENT_SWIZZLE_ZERO; + components->g = VK_COMPONENT_SWIZZLE_ZERO; + components->b = VK_COMPONENT_SWIZZLE_ZERO; + components->a = VK_COMPONENT_SWIZZLE_R; + } + else + { + FIXME("Alpha swizzle is not supported.\n"); + } + } + + if (format->dxgi_format == DXGI_FORMAT_B8G8R8X8_UNORM + || format->dxgi_format == DXGI_FORMAT_B8G8R8X8_UNORM_SRGB) + { + if (allowed_swizzle) + { + components->r = VK_COMPONENT_SWIZZLE_R; + components->g = VK_COMPONENT_SWIZZLE_G; + components->b = VK_COMPONENT_SWIZZLE_B; + components->a = VK_COMPONENT_SWIZZLE_ONE; + } + else + { + FIXME("B8G8R8X8 swizzle is not supported.\n"); + } + } +} + +static VkComponentSwizzle vk_component_swizzle_from_d3d12(unsigned int component_mapping, + unsigned int component_index) +{ + D3D12_SHADER_COMPONENT_MAPPING mapping + = D3D12_DECODE_SHADER_4_COMPONENT_MAPPING(component_index, component_mapping); + + switch (mapping) + { + case D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0: + return VK_COMPONENT_SWIZZLE_R; + case D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1: + return VK_COMPONENT_SWIZZLE_G; + case D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2: + return VK_COMPONENT_SWIZZLE_B; + case D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3: + return VK_COMPONENT_SWIZZLE_A; + case D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0: + return VK_COMPONENT_SWIZZLE_ZERO; + case D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1: + return VK_COMPONENT_SWIZZLE_ONE; + } + + FIXME("Invalid component mapping %#x.\n", mapping); + return VK_COMPONENT_SWIZZLE_IDENTITY; +} + +static void vk_component_mapping_from_d3d12(VkComponentMapping *components, + unsigned int component_mapping) +{ + components->r = vk_component_swizzle_from_d3d12(component_mapping, 0); + components->g = vk_component_swizzle_from_d3d12(component_mapping, 1); + components->b = vk_component_swizzle_from_d3d12(component_mapping, 2); + components->a = vk_component_swizzle_from_d3d12(component_mapping, 3); +} + +static VkComponentSwizzle swizzle_vk_component(const VkComponentMapping *components, + VkComponentSwizzle component, VkComponentSwizzle swizzle) +{ + switch (swizzle) + { + case VK_COMPONENT_SWIZZLE_IDENTITY: + break; + + case VK_COMPONENT_SWIZZLE_R: + component = components->r; + break; + + case VK_COMPONENT_SWIZZLE_G: + component = components->g; + break; + + case VK_COMPONENT_SWIZZLE_B: + component = components->b; + break; + + case VK_COMPONENT_SWIZZLE_A: + component = components->a; + break; + + case VK_COMPONENT_SWIZZLE_ONE: + case VK_COMPONENT_SWIZZLE_ZERO: + component = swizzle; + break; + + default: + FIXME("Invalid component swizzle %#x.\n", swizzle); + break; + } + + assert(component != VK_COMPONENT_SWIZZLE_IDENTITY); + return component; +} + +static void vk_component_mapping_compose(VkComponentMapping *dst, const VkComponentMapping *b) +{ + const VkComponentMapping a = *dst; + + dst->r = swizzle_vk_component(&a, a.r, b->r); + dst->g = swizzle_vk_component(&a, a.g, b->g); + dst->b = swizzle_vk_component(&a, a.b, b->b); + dst->a = swizzle_vk_component(&a, a.a, b->a); +} + +static bool init_default_texture_view_desc(struct vkd3d_texture_view_desc *desc, + struct d3d12_resource *resource, DXGI_FORMAT view_format) +{ + const struct d3d12_device *device = resource->device; + + if (view_format == resource->desc.Format) + { + desc->format = resource->format; + } + else if (!(desc->format = vkd3d_format_from_d3d12_resource_desc(device, &resource->desc, view_format))) + { + FIXME("Failed to find format (resource format %#x, view format %#x).\n", + resource->desc.Format, view_format); + return false; + } + + desc->miplevel_idx = 0; + desc->miplevel_count = 1; + desc->layer_idx = 0; + desc->layer_count = d3d12_resource_desc_get_layer_count(&resource->desc); + desc->vk_image_aspect = desc->format->vk_aspect_mask; + + switch (resource->desc.Dimension) + { + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: + desc->view_type = resource->desc.DepthOrArraySize > 1 + ? VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D; + break; + + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: + desc->view_type = resource->desc.DepthOrArraySize > 1 + ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; + break; + + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: + desc->view_type = VK_IMAGE_VIEW_TYPE_3D; + desc->layer_count = 1; + break; + + default: + FIXME("Resource dimension %#x not implemented.\n", resource->desc.Dimension); + return false; + } + + desc->components.r = VK_COMPONENT_SWIZZLE_IDENTITY; + desc->components.g = VK_COMPONENT_SWIZZLE_IDENTITY; + desc->components.b = VK_COMPONENT_SWIZZLE_IDENTITY; + desc->components.a = VK_COMPONENT_SWIZZLE_IDENTITY; + desc->allowed_swizzle = false; + return true; +} + +static void vkd3d_texture_view_desc_normalise(struct vkd3d_texture_view_desc *desc, + const D3D12_RESOURCE_DESC *resource_desc) +{ + unsigned int max_layer_count; + + if (resource_desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) + { + if (desc->view_type == VK_IMAGE_VIEW_TYPE_2D_ARRAY) + max_layer_count = max(1, resource_desc->DepthOrArraySize >> desc->miplevel_idx); + else + max_layer_count = 1; + } + else + { + max_layer_count = resource_desc->DepthOrArraySize; + } + + if (desc->layer_idx >= max_layer_count) + { + WARN("Layer index %u exceeds maximum available layer %u.\n", desc->layer_idx, max_layer_count - 1); + desc->layer_count = 1; + return; + } + + max_layer_count -= desc->layer_idx; + if (desc->layer_count <= max_layer_count) + return; + + if (desc->layer_count != UINT_MAX) + WARN("Layer count %u exceeds maximum %u.\n", desc->layer_count, max_layer_count); + desc->layer_count = max_layer_count; +} + +bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, + const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + const struct vkd3d_format *format = desc->format; + struct VkImageViewCreateInfo view_desc; + struct vkd3d_view *object; + VkImageView vk_view; + VkResult vr; + + view_desc.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_desc.pNext = NULL; + view_desc.flags = 0; + view_desc.image = vk_image; + view_desc.viewType = desc->view_type; + view_desc.format = format->vk_format; + vkd3d_set_view_swizzle_for_format(&view_desc.components, format, desc->allowed_swizzle); + if (desc->allowed_swizzle) + vk_component_mapping_compose(&view_desc.components, &desc->components); + view_desc.subresourceRange.aspectMask = desc->vk_image_aspect; + view_desc.subresourceRange.baseMipLevel = desc->miplevel_idx; + view_desc.subresourceRange.levelCount = desc->miplevel_count; + view_desc.subresourceRange.baseArrayLayer = desc->layer_idx; + view_desc.subresourceRange.layerCount = desc->layer_count; + if ((vr = VK_CALL(vkCreateImageView(device->vk_device, &view_desc, NULL, &vk_view))) < 0) + { + WARN("Failed to create Vulkan image view, vr %d.\n", vr); + return false; + } + + if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_IMAGE))) + { + VK_CALL(vkDestroyImageView(device->vk_device, vk_view, NULL)); + return false; + } + + object->u.vk_image_view = vk_view; + object->format = format; + object->info.texture.vk_view_type = desc->view_type; + object->info.texture.miplevel_idx = desc->miplevel_idx; + object->info.texture.layer_idx = desc->layer_idx; + object->info.texture.layer_count = desc->layer_count; + *view = object; + return true; +} + +void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, + struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc) +{ + struct VkDescriptorBufferInfo *buffer_info; + struct d3d12_resource *resource; + + if (!desc) + { + WARN("Constant buffer desc is NULL.\n"); + return; + } + + if (desc->SizeInBytes & (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1)) + { + WARN("Size is not %u bytes aligned.\n", D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + return; + } + + buffer_info = &descriptor->u.vk_cbv_info; + if (desc->BufferLocation) + { + resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, desc->BufferLocation); + buffer_info->buffer = resource->u.vk_buffer; + buffer_info->offset = desc->BufferLocation - resource->gpu_address; + buffer_info->range = min(desc->SizeInBytes, resource->desc.Width - buffer_info->offset); + } + else + { + /* NULL descriptor */ + buffer_info->buffer = device->null_resources.vk_buffer; + buffer_info->offset = 0; + buffer_info->range = VKD3D_NULL_BUFFER_SIZE; + } + + descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_CBV; + descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; +} + +static unsigned int vkd3d_view_flags_from_d3d12_buffer_srv_flags(D3D12_BUFFER_SRV_FLAGS flags) +{ + if (flags == D3D12_BUFFER_SRV_FLAG_RAW) + return VKD3D_VIEW_RAW_BUFFER; + if (flags) + FIXME("Unhandled buffer SRV flags %#x.\n", flags); + return 0; +} + +static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, + struct d3d12_device *device, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) +{ + struct vkd3d_null_resources *null_resources = &device->null_resources; + struct vkd3d_texture_view_desc vkd3d_desc; + struct vkd3d_view *view; + VkImage vk_image; + + if (!desc) + { + WARN("D3D12_SHADER_RESOURCE_VIEW_DESC is required for NULL view.\n"); + return; + } + + switch (desc->ViewDimension) + { + case D3D12_SRV_DIMENSION_BUFFER: + WARN("Creating NULL buffer SRV %#x.\n", desc->Format); + + if (vkd3d_create_buffer_view(device, null_resources->vk_buffer, + vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), + 0, VKD3D_NULL_BUFFER_SIZE, &view)) + { + descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; + descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + descriptor->u.view = view; + } + return; + + case D3D12_SRV_DIMENSION_TEXTURE2D: + vk_image = null_resources->vk_2d_image; + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; + break; + case D3D12_SRV_DIMENSION_TEXTURE2DARRAY: + vk_image = null_resources->vk_2d_image; + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + break; + + default: + FIXME("Unhandled view dimension %#x.\n", desc->ViewDimension); + return; + } + + WARN("Creating NULL SRV %#x.\n", desc->ViewDimension); + + vkd3d_desc.format = vkd3d_get_format(device, VKD3D_NULL_VIEW_FORMAT, false); + vkd3d_desc.miplevel_idx = 0; + vkd3d_desc.miplevel_count = 1; + vkd3d_desc.layer_idx = 0; + vkd3d_desc.layer_count = 1; + vkd3d_desc.components.r = VK_COMPONENT_SWIZZLE_ZERO; + vkd3d_desc.components.g = VK_COMPONENT_SWIZZLE_ZERO; + vkd3d_desc.components.b = VK_COMPONENT_SWIZZLE_ZERO; + vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_ZERO; + vkd3d_desc.allowed_swizzle = true; + + if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) + return; + + descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; + descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor->u.view = view; +} + +static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, + struct d3d12_device *device, struct d3d12_resource *resource, + const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) +{ + struct vkd3d_view *view; + unsigned int flags; + + if (!desc) + { + FIXME("Default SRV views not supported.\n"); + return; + } + + if (desc->ViewDimension != D3D12_SRV_DIMENSION_BUFFER) + { + WARN("Unexpected view dimension %#x.\n", desc->ViewDimension); + return; + } + + flags = vkd3d_view_flags_from_d3d12_buffer_srv_flags(desc->u.Buffer.Flags); + if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, + desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, + desc->u.Buffer.StructureByteStride, flags, &view)) + return; + + descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; + descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + descriptor->u.view = view; +} + +static VkImageAspectFlags vk_image_aspect_flags_from_d3d12_plane_slice(const struct vkd3d_format *format, + unsigned int plane_slice) +{ + VkImageAspectFlags aspect_flags = format->vk_aspect_mask; + unsigned int i; + + /* For all formats we currently handle, the n-th aspect bit in Vulkan, if the lowest bit set is + * n = 0, corresponds to the n-th plane in D3D12, so clear the lowest bit for each slice skipped. */ + for (i = 0; i < plane_slice; i++) + aspect_flags &= aspect_flags - 1; + + if (!aspect_flags) + { + WARN("Invalid plane slice %u for format %#x.\n", plane_slice, format->vk_format); + aspect_flags = format->vk_aspect_mask; + } + + /* The selected slice is now the lowest bit in the aspect flags, so clear the others. */ + return aspect_flags & -aspect_flags; +} + +void d3d12_desc_create_srv(struct d3d12_desc *descriptor, + struct d3d12_device *device, struct d3d12_resource *resource, + const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) +{ + struct vkd3d_texture_view_desc vkd3d_desc; + struct vkd3d_view *view; + + if (!resource) + { + vkd3d_create_null_srv(descriptor, device, desc); + return; + } + + if (d3d12_resource_is_buffer(resource)) + { + vkd3d_create_buffer_srv(descriptor, device, resource, desc); + return; + } + + if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) + return; + + vkd3d_desc.miplevel_count = VK_REMAINING_MIP_LEVELS; + vkd3d_desc.allowed_swizzle = true; + + if (desc) + { + if (desc->Shader4ComponentMapping != D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING) + { + TRACE("Component mapping %s for format %#x.\n", + debug_d3d12_shader_component_mapping(desc->Shader4ComponentMapping), desc->Format); + + vk_component_mapping_from_d3d12(&vkd3d_desc.components, desc->Shader4ComponentMapping); + } + + switch (desc->ViewDimension) + { + case D3D12_SRV_DIMENSION_TEXTURE2D: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; + vkd3d_desc.miplevel_idx = desc->u.Texture2D.MostDetailedMip; + vkd3d_desc.miplevel_count = desc->u.Texture2D.MipLevels; + if (desc->u.Texture2D.PlaneSlice) + vkd3d_desc.vk_image_aspect = vk_image_aspect_flags_from_d3d12_plane_slice(resource->format, + desc->u.Texture2D.PlaneSlice); + if (desc->u.Texture2D.ResourceMinLODClamp) + FIXME("Unhandled min LOD clamp %.8e.\n", desc->u.Texture2D.ResourceMinLODClamp); + break; + case D3D12_SRV_DIMENSION_TEXTURE2DARRAY: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + vkd3d_desc.miplevel_idx = desc->u.Texture2DArray.MostDetailedMip; + vkd3d_desc.miplevel_count = desc->u.Texture2DArray.MipLevels; + vkd3d_desc.layer_idx = desc->u.Texture2DArray.FirstArraySlice; + vkd3d_desc.layer_count = desc->u.Texture2DArray.ArraySize; + if (desc->u.Texture2DArray.PlaneSlice) + vkd3d_desc.vk_image_aspect = vk_image_aspect_flags_from_d3d12_plane_slice(resource->format, + desc->u.Texture2DArray.PlaneSlice); + if (desc->u.Texture2DArray.ResourceMinLODClamp) + FIXME("Unhandled min LOD clamp %.8e.\n", desc->u.Texture2DArray.ResourceMinLODClamp); + vkd3d_texture_view_desc_normalise(&vkd3d_desc, &resource->desc); + break; + case D3D12_SRV_DIMENSION_TEXTURE2DMS: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; + break; + case D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + vkd3d_desc.layer_idx = desc->u.Texture2DMSArray.FirstArraySlice; + vkd3d_desc.layer_count = desc->u.Texture2DMSArray.ArraySize; + vkd3d_texture_view_desc_normalise(&vkd3d_desc, &resource->desc); + break; + case D3D12_SRV_DIMENSION_TEXTURE3D: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_3D; + vkd3d_desc.miplevel_idx = desc->u.Texture3D.MostDetailedMip; + vkd3d_desc.miplevel_count = desc->u.Texture3D.MipLevels; + if (desc->u.Texture3D.ResourceMinLODClamp) + FIXME("Unhandled min LOD clamp %.8e.\n", desc->u.Texture2D.ResourceMinLODClamp); + break; + case D3D12_SRV_DIMENSION_TEXTURECUBE: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_CUBE; + vkd3d_desc.miplevel_idx = desc->u.TextureCube.MostDetailedMip; + vkd3d_desc.miplevel_count = desc->u.TextureCube.MipLevels; + vkd3d_desc.layer_count = 6; + if (desc->u.TextureCube.ResourceMinLODClamp) + FIXME("Unhandled min LOD clamp %.8e.\n", desc->u.TextureCube.ResourceMinLODClamp); + break; + case D3D12_SRV_DIMENSION_TEXTURECUBEARRAY: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; + vkd3d_desc.miplevel_idx = desc->u.TextureCubeArray.MostDetailedMip; + vkd3d_desc.miplevel_count = desc->u.TextureCubeArray.MipLevels; + vkd3d_desc.layer_idx = desc->u.TextureCubeArray.First2DArrayFace; + vkd3d_desc.layer_count = desc->u.TextureCubeArray.NumCubes; + if (vkd3d_desc.layer_count != UINT_MAX) + vkd3d_desc.layer_count *= 6; + if (desc->u.TextureCubeArray.ResourceMinLODClamp) + FIXME("Unhandled min LOD clamp %.8e.\n", desc->u.TextureCubeArray.ResourceMinLODClamp); + vkd3d_texture_view_desc_normalise(&vkd3d_desc, &resource->desc); + break; + default: + FIXME("Unhandled view dimension %#x.\n", desc->ViewDimension); + } + } + + if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) + return; + + descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; + descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor->u.view = view; +} + +static unsigned int vkd3d_view_flags_from_d3d12_buffer_uav_flags(D3D12_BUFFER_UAV_FLAGS flags) +{ + if (flags == D3D12_BUFFER_UAV_FLAG_RAW) + return VKD3D_VIEW_RAW_BUFFER; + if (flags) + FIXME("Unhandled buffer UAV flags %#x.\n", flags); + return 0; +} + +static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, + struct d3d12_device *device, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) +{ + struct vkd3d_null_resources *null_resources = &device->null_resources; + struct vkd3d_texture_view_desc vkd3d_desc; + struct vkd3d_view *view; + VkImage vk_image; + + if (!desc) + { + WARN("View desc is required for NULL view.\n"); + return; + } + + switch (desc->ViewDimension) + { + case D3D12_UAV_DIMENSION_BUFFER: + WARN("Creating NULL buffer UAV %#x.\n", desc->Format); + + if (vkd3d_create_buffer_view(device, null_resources->vk_storage_buffer, + vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), + 0, VKD3D_NULL_BUFFER_SIZE, &view)) + { + descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; + descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + descriptor->u.view = view; + } + return; + + case D3D12_UAV_DIMENSION_TEXTURE2D: + vk_image = null_resources->vk_2d_storage_image; + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; + break; + case D3D12_UAV_DIMENSION_TEXTURE2DARRAY: + vk_image = null_resources->vk_2d_storage_image; + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + break; + + default: + FIXME("Unhandled view dimension %#x.\n", desc->ViewDimension); + return; + } + + WARN("Creating NULL UAV %#x.\n", desc->ViewDimension); + + vkd3d_desc.format = vkd3d_get_format(device, VKD3D_NULL_VIEW_FORMAT, false); + vkd3d_desc.miplevel_idx = 0; + vkd3d_desc.miplevel_count = 1; + vkd3d_desc.layer_idx = 0; + vkd3d_desc.layer_count = 1; + vkd3d_desc.components.r = VK_COMPONENT_SWIZZLE_R; + vkd3d_desc.components.g = VK_COMPONENT_SWIZZLE_G; + vkd3d_desc.components.b = VK_COMPONENT_SWIZZLE_B; + vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_A; + vkd3d_desc.allowed_swizzle = false; + + if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) + return; + + descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; + descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + descriptor->u.view = view; +} + +static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, + struct d3d12_resource *resource, struct d3d12_resource *counter_resource, + const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) +{ + struct vkd3d_view *view; + unsigned int flags; + + if (!desc) + { + FIXME("Default UAV views not supported.\n"); + return; + } + + if (desc->ViewDimension != D3D12_UAV_DIMENSION_BUFFER) + { + WARN("Unexpected view dimension %#x.\n", desc->ViewDimension); + return; + } + + flags = vkd3d_view_flags_from_d3d12_buffer_uav_flags(desc->u.Buffer.Flags); + if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, + desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, + desc->u.Buffer.StructureByteStride, flags, &view)) + return; + + descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; + descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + descriptor->u.view = view; + + if (counter_resource) + { + const struct vkd3d_format *format; + + assert(d3d12_resource_is_buffer(counter_resource)); + assert(desc->u.Buffer.StructureByteStride); + + format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); + if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, + desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->vk_counter_view)) + { + WARN("Failed to create counter buffer view.\n"); + view->vk_counter_view = VK_NULL_HANDLE; + d3d12_desc_destroy(descriptor, device); + } + } +} + +static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, + struct d3d12_device *device, struct d3d12_resource *resource, + const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) +{ + struct vkd3d_texture_view_desc vkd3d_desc; + struct vkd3d_view *view; + + if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) + return; + + if (vkd3d_format_is_compressed(vkd3d_desc.format)) + { + WARN("UAVs cannot be created for compressed formats.\n"); + return; + } + + if (desc) + { + switch (desc->ViewDimension) + { + case D3D12_UAV_DIMENSION_TEXTURE2D: + vkd3d_desc.miplevel_idx = desc->u.Texture2D.MipSlice; + if (desc->u.Texture2D.PlaneSlice) + vkd3d_desc.vk_image_aspect = vk_image_aspect_flags_from_d3d12_plane_slice(resource->format, + desc->u.Texture2D.PlaneSlice); + break; + case D3D12_UAV_DIMENSION_TEXTURE2DARRAY: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + vkd3d_desc.miplevel_idx = desc->u.Texture2DArray.MipSlice; + vkd3d_desc.layer_idx = desc->u.Texture2DArray.FirstArraySlice; + vkd3d_desc.layer_count = desc->u.Texture2DArray.ArraySize; + if (desc->u.Texture2DArray.PlaneSlice) + vkd3d_desc.vk_image_aspect = vk_image_aspect_flags_from_d3d12_plane_slice(resource->format, + desc->u.Texture2DArray.PlaneSlice); + vkd3d_texture_view_desc_normalise(&vkd3d_desc, &resource->desc); + break; + case D3D12_UAV_DIMENSION_TEXTURE3D: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_3D; + vkd3d_desc.miplevel_idx = desc->u.Texture3D.MipSlice; + if (desc->u.Texture3D.FirstWSlice || (desc->u.Texture3D.WSize != UINT_MAX + && desc->u.Texture3D.WSize != max(1u, + resource->desc.DepthOrArraySize >> desc->u.Texture3D.MipSlice))) + FIXME("Unhandled depth view %u-%u.\n", + desc->u.Texture3D.FirstWSlice, desc->u.Texture3D.WSize); + break; + default: + FIXME("Unhandled view dimension %#x.\n", desc->ViewDimension); + } + } + + if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) + return; + + descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; + descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + descriptor->u.view = view; +} + +void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, + struct d3d12_resource *resource, struct d3d12_resource *counter_resource, + const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) +{ + if (!resource) + { + if (counter_resource) + FIXME("Ignoring counter resource %p.\n", counter_resource); + vkd3d_create_null_uav(descriptor, device, desc); + return; + } + + if (d3d12_resource_is_buffer(resource)) + { + vkd3d_create_buffer_uav(descriptor, device, resource, counter_resource, desc); + } + else + { + if (counter_resource) + FIXME("Unexpected counter resource for texture view.\n"); + vkd3d_create_texture_uav(descriptor, device, resource, desc); + } +} + +bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, + D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view) +{ + const struct vkd3d_format *format; + struct d3d12_resource *resource; + + format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); + resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address); + assert(d3d12_resource_is_buffer(resource)); + return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format, + gpu_address - resource->gpu_address, VK_WHOLE_SIZE, vk_buffer_view); +} + +/* samplers */ +static VkFilter vk_filter_from_d3d12(D3D12_FILTER_TYPE type) +{ + switch (type) + { + case D3D12_FILTER_TYPE_POINT: + return VK_FILTER_NEAREST; + case D3D12_FILTER_TYPE_LINEAR: + return VK_FILTER_LINEAR; + default: + FIXME("Unhandled filter type %#x.\n", type); + return VK_FILTER_NEAREST; + } +} + +static VkSamplerMipmapMode vk_mipmap_mode_from_d3d12(D3D12_FILTER_TYPE type) +{ + switch (type) + { + case D3D12_FILTER_TYPE_POINT: + return VK_SAMPLER_MIPMAP_MODE_NEAREST; + case D3D12_FILTER_TYPE_LINEAR: + return VK_SAMPLER_MIPMAP_MODE_LINEAR; + default: + FIXME("Unhandled filter type %#x.\n", type); + return VK_SAMPLER_MIPMAP_MODE_NEAREST; + } +} + +static VkSamplerAddressMode vk_address_mode_from_d3d12(const struct d3d12_device *device, + D3D12_TEXTURE_ADDRESS_MODE mode) +{ + switch (mode) + { + case D3D12_TEXTURE_ADDRESS_MODE_WRAP: + return VK_SAMPLER_ADDRESS_MODE_REPEAT; + case D3D12_TEXTURE_ADDRESS_MODE_MIRROR: + return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + case D3D12_TEXTURE_ADDRESS_MODE_CLAMP: + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case D3D12_TEXTURE_ADDRESS_MODE_BORDER: + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + case D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE: + if (device->vk_info.KHR_sampler_mirror_clamp_to_edge) + return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + /* Fall through */ + default: + FIXME("Unhandled address mode %#x.\n", mode); + return VK_SAMPLER_ADDRESS_MODE_REPEAT; + } +} + +static VkResult d3d12_create_sampler(struct d3d12_device *device, D3D12_FILTER filter, + D3D12_TEXTURE_ADDRESS_MODE address_u, D3D12_TEXTURE_ADDRESS_MODE address_v, + D3D12_TEXTURE_ADDRESS_MODE address_w, float mip_lod_bias, unsigned int max_anisotropy, + D3D12_COMPARISON_FUNC comparison_func, float min_lod, float max_lod, + VkSampler *vk_sampler) +{ + const struct vkd3d_vk_device_procs *vk_procs; + struct VkSamplerCreateInfo sampler_desc; + VkResult vr; + + vk_procs = &device->vk_procs; + + if (D3D12_DECODE_FILTER_REDUCTION(filter) == D3D12_FILTER_REDUCTION_TYPE_MINIMUM + || D3D12_DECODE_FILTER_REDUCTION(filter) == D3D12_FILTER_REDUCTION_TYPE_MAXIMUM) + FIXME("Min/max reduction mode not supported.\n"); + + sampler_desc.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_desc.pNext = NULL; + sampler_desc.flags = 0; + sampler_desc.magFilter = vk_filter_from_d3d12(D3D12_DECODE_MAG_FILTER(filter)); + sampler_desc.minFilter = vk_filter_from_d3d12(D3D12_DECODE_MIN_FILTER(filter)); + sampler_desc.mipmapMode = vk_mipmap_mode_from_d3d12(D3D12_DECODE_MIP_FILTER(filter)); + sampler_desc.addressModeU = vk_address_mode_from_d3d12(device, address_u); + sampler_desc.addressModeV = vk_address_mode_from_d3d12(device, address_v); + sampler_desc.addressModeW = vk_address_mode_from_d3d12(device, address_w); + sampler_desc.mipLodBias = mip_lod_bias; + sampler_desc.anisotropyEnable = D3D12_DECODE_IS_ANISOTROPIC_FILTER(filter); + sampler_desc.maxAnisotropy = max_anisotropy; + sampler_desc.compareEnable = D3D12_DECODE_IS_COMPARISON_FILTER(filter); + sampler_desc.compareOp = sampler_desc.compareEnable ? vk_compare_op_from_d3d12(comparison_func) : 0; + sampler_desc.minLod = min_lod; + sampler_desc.maxLod = max_lod; + sampler_desc.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + sampler_desc.unnormalizedCoordinates = VK_FALSE; + if ((vr = VK_CALL(vkCreateSampler(device->vk_device, &sampler_desc, NULL, vk_sampler))) < 0) + WARN("Failed to create Vulkan sampler, vr %d.\n", vr); + + return vr; +} + +void d3d12_desc_create_sampler(struct d3d12_desc *sampler, + struct d3d12_device *device, const D3D12_SAMPLER_DESC *desc) +{ + struct vkd3d_view *view; + + if (!desc) + { + WARN("NULL sampler desc.\n"); + return; + } + + if (desc->AddressU == D3D12_TEXTURE_ADDRESS_MODE_BORDER + || desc->AddressV == D3D12_TEXTURE_ADDRESS_MODE_BORDER + || desc->AddressW == D3D12_TEXTURE_ADDRESS_MODE_BORDER) + FIXME("Ignoring border color {%.8e, %.8e, %.8e, %.8e}.\n", + desc->BorderColor[0], desc->BorderColor[1], desc->BorderColor[2], desc->BorderColor[3]); + + if (!(view = vkd3d_view_create(VKD3D_VIEW_TYPE_SAMPLER))) + return; + + if (d3d12_create_sampler(device, desc->Filter, desc->AddressU, + desc->AddressV, desc->AddressW, desc->MipLODBias, desc->MaxAnisotropy, + desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->u.vk_sampler) < 0) + { + vkd3d_free(view); + return; + } + + sampler->magic = VKD3D_DESCRIPTOR_MAGIC_SAMPLER; + sampler->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLER; + sampler->u.view = view; +} + +HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, + const D3D12_STATIC_SAMPLER_DESC *desc, VkSampler *vk_sampler) +{ + VkResult vr; + + if (desc->AddressU == D3D12_TEXTURE_ADDRESS_MODE_BORDER + || desc->AddressV == D3D12_TEXTURE_ADDRESS_MODE_BORDER + || desc->AddressW == D3D12_TEXTURE_ADDRESS_MODE_BORDER) + FIXME("Ignoring border %#x.\n", desc->BorderColor); + + vr = d3d12_create_sampler(device, desc->Filter, desc->AddressU, + desc->AddressV, desc->AddressW, desc->MipLODBias, desc->MaxAnisotropy, + desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, vk_sampler); + return hresult_from_vk_result(vr); +} + +/* RTVs */ +static void d3d12_rtv_desc_destroy(struct d3d12_rtv_desc *rtv, struct d3d12_device *device) +{ + if (rtv->magic != VKD3D_DESCRIPTOR_MAGIC_RTV) + return; + + vkd3d_view_decref(rtv->view, device); + memset(rtv, 0, sizeof(*rtv)); +} + +void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_device *device, + struct d3d12_resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc) +{ + struct vkd3d_texture_view_desc vkd3d_desc; + struct vkd3d_view *view; + + d3d12_rtv_desc_destroy(rtv_desc, device); + + if (!resource) + { + FIXME("NULL resource RTV not implemented.\n"); + return; + } + + if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) + return; + + if (vkd3d_desc.format->vk_aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT) + { + WARN("Trying to create RTV for depth/stencil format %#x.\n", vkd3d_desc.format->dxgi_format); + return; + } + + if (desc) + { + switch (desc->ViewDimension) + { + case D3D12_RTV_DIMENSION_TEXTURE2D: + vkd3d_desc.miplevel_idx = desc->u.Texture2D.MipSlice; + if (desc->u.Texture2D.PlaneSlice) + vkd3d_desc.vk_image_aspect = vk_image_aspect_flags_from_d3d12_plane_slice(resource->format, + desc->u.Texture2D.PlaneSlice); + break; + case D3D12_RTV_DIMENSION_TEXTURE2DARRAY: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + vkd3d_desc.miplevel_idx = desc->u.Texture2DArray.MipSlice; + vkd3d_desc.layer_idx = desc->u.Texture2DArray.FirstArraySlice; + vkd3d_desc.layer_count = desc->u.Texture2DArray.ArraySize; + if (desc->u.Texture2DArray.PlaneSlice) + vkd3d_desc.vk_image_aspect = vk_image_aspect_flags_from_d3d12_plane_slice(resource->format, + desc->u.Texture2DArray.PlaneSlice); + vkd3d_texture_view_desc_normalise(&vkd3d_desc, &resource->desc); + break; + case D3D12_RTV_DIMENSION_TEXTURE2DMS: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; + break; + case D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + vkd3d_desc.layer_idx = desc->u.Texture2DMSArray.FirstArraySlice; + vkd3d_desc.layer_count = desc->u.Texture2DMSArray.ArraySize; + vkd3d_texture_view_desc_normalise(&vkd3d_desc, &resource->desc); + break; + case D3D12_RTV_DIMENSION_TEXTURE3D: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + vkd3d_desc.miplevel_idx = desc->u.Texture3D.MipSlice; + vkd3d_desc.layer_idx = desc->u.Texture3D.FirstWSlice; + vkd3d_desc.layer_count = desc->u.Texture3D.WSize; + vkd3d_texture_view_desc_normalise(&vkd3d_desc, &resource->desc); + break; + default: + FIXME("Unhandled view dimension %#x.\n", desc->ViewDimension); + } + } + else if (resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) + { + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + vkd3d_desc.layer_idx = 0; + vkd3d_desc.layer_count = resource->desc.DepthOrArraySize; + } + + assert(d3d12_resource_is_texture(resource)); + + if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) + return; + + rtv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_RTV; + rtv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); + rtv_desc->format = vkd3d_desc.format; + rtv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); + rtv_desc->height = d3d12_resource_desc_get_height(&resource->desc, vkd3d_desc.miplevel_idx); + rtv_desc->layer_count = vkd3d_desc.layer_count; + rtv_desc->view = view; + rtv_desc->resource = resource; +} + +/* DSVs */ +static void d3d12_dsv_desc_destroy(struct d3d12_dsv_desc *dsv, struct d3d12_device *device) +{ + if (dsv->magic != VKD3D_DESCRIPTOR_MAGIC_DSV) + return; + + vkd3d_view_decref(dsv->view, device); + memset(dsv, 0, sizeof(*dsv)); +} + +void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device, + struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc) +{ + struct vkd3d_texture_view_desc vkd3d_desc; + struct vkd3d_view *view; + + d3d12_dsv_desc_destroy(dsv_desc, device); + + if (!resource) + { + FIXME("NULL resource DSV not implemented.\n"); + return; + } + + if (resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) + { + WARN("Cannot create DSV for 3D texture.\n"); + return; + } + + if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) + return; + + if (!(vkd3d_desc.format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) + { + WARN("Trying to create DSV for format %#x.\n", vkd3d_desc.format->dxgi_format); + return; + } + + if (desc) + { + if (desc->Flags) + FIXME("Ignoring flags %#x.\n", desc->Flags); + + switch (desc->ViewDimension) + { + case D3D12_DSV_DIMENSION_TEXTURE2D: + vkd3d_desc.miplevel_idx = desc->u.Texture2D.MipSlice; + break; + case D3D12_DSV_DIMENSION_TEXTURE2DARRAY: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + vkd3d_desc.miplevel_idx = desc->u.Texture2DArray.MipSlice; + vkd3d_desc.layer_idx = desc->u.Texture2DArray.FirstArraySlice; + vkd3d_desc.layer_count = desc->u.Texture2DArray.ArraySize; + vkd3d_texture_view_desc_normalise(&vkd3d_desc, &resource->desc); + break; + case D3D12_DSV_DIMENSION_TEXTURE2DMS: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; + break; + case D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + vkd3d_desc.layer_idx = desc->u.Texture2DMSArray.FirstArraySlice; + vkd3d_desc.layer_count = desc->u.Texture2DMSArray.ArraySize; + vkd3d_texture_view_desc_normalise(&vkd3d_desc, &resource->desc); + break; + default: + FIXME("Unhandled view dimension %#x.\n", desc->ViewDimension); + } + } + + assert(d3d12_resource_is_texture(resource)); + + if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) + return; + + dsv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_DSV; + dsv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); + dsv_desc->format = vkd3d_desc.format; + dsv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); + dsv_desc->height = d3d12_resource_desc_get_height(&resource->desc, vkd3d_desc.miplevel_idx); + dsv_desc->layer_count = vkd3d_desc.layer_count; + dsv_desc->view = view; + dsv_desc->resource = resource; +} + +/* ID3D12DescriptorHeap */ +static inline struct d3d12_descriptor_heap *impl_from_ID3D12DescriptorHeap(ID3D12DescriptorHeap *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_descriptor_heap, ID3D12DescriptorHeap_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_descriptor_heap_QueryInterface(ID3D12DescriptorHeap *iface, + REFIID riid, void **object) +{ + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + + if (IsEqualGUID(riid, &IID_ID3D12DescriptorHeap) + || IsEqualGUID(riid, &IID_ID3D12Pageable) + || IsEqualGUID(riid, &IID_ID3D12DeviceChild) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { + ID3D12DescriptorHeap_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_AddRef(ID3D12DescriptorHeap *iface) +{ + struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(iface); + ULONG refcount = InterlockedIncrement(&heap->refcount); + + TRACE("%p increasing refcount to %u.\n", heap, refcount); + + return refcount; +} + +static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHeap *iface) +{ + struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(iface); + ULONG refcount = InterlockedDecrement(&heap->refcount); + + TRACE("%p decreasing refcount to %u.\n", heap, refcount); + + if (!refcount) + { + struct d3d12_device *device = heap->device; + unsigned int i; + + vkd3d_private_store_destroy(&heap->private_store); + + switch (heap->desc.Type) + { + case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV: + case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER: + { + struct d3d12_desc *descriptors = (struct d3d12_desc *)heap->descriptors; + + for (i = 0; i < heap->desc.NumDescriptors; ++i) + { + d3d12_desc_destroy(&descriptors[i], device); + } + + if (device->vk_info.EXT_descriptor_indexing && !vkd3d_gpu_descriptor_allocator_unregister_range( + &device->gpu_descriptor_allocator, descriptors)) + ERR("Failed to unregister descriptor range.\n"); + break; + } + + case D3D12_DESCRIPTOR_HEAP_TYPE_RTV: + { + struct d3d12_rtv_desc *rtvs = (struct d3d12_rtv_desc *)heap->descriptors; + + for (i = 0; i < heap->desc.NumDescriptors; ++i) + { + d3d12_rtv_desc_destroy(&rtvs[i], device); + } + break; + } + + case D3D12_DESCRIPTOR_HEAP_TYPE_DSV: + { + struct d3d12_dsv_desc *dsvs = (struct d3d12_dsv_desc *)heap->descriptors; + + for (i = 0; i < heap->desc.NumDescriptors; ++i) + { + d3d12_dsv_desc_destroy(&dsvs[i], device); + } + break; + } + + default: + break; + } + + vkd3d_free(heap); + + d3d12_device_release(device); + } + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_descriptor_heap_GetPrivateData(ID3D12DescriptorHeap *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&heap->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_descriptor_heap_SetPrivateData(ID3D12DescriptorHeap *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&heap->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_descriptor_heap_SetPrivateDataInterface(ID3D12DescriptorHeap *iface, + REFGUID guid, const IUnknown *data) +{ + struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&heap->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_descriptor_heap_SetName(ID3D12DescriptorHeap *iface, const WCHAR *name) +{ + struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, heap->device->wchar_size)); + + return name ? S_OK : E_INVALIDARG; +} + +static HRESULT STDMETHODCALLTYPE d3d12_descriptor_heap_GetDevice(ID3D12DescriptorHeap *iface, REFIID iid, void **device) +{ + struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(heap->device, iid, device); +} + +static D3D12_DESCRIPTOR_HEAP_DESC * STDMETHODCALLTYPE d3d12_descriptor_heap_GetDesc(ID3D12DescriptorHeap *iface, + D3D12_DESCRIPTOR_HEAP_DESC *desc) +{ + struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(iface); + + TRACE("iface %p, desc %p.\n", iface, desc); + + *desc = heap->desc; + return desc; +} + +static D3D12_CPU_DESCRIPTOR_HANDLE * STDMETHODCALLTYPE d3d12_descriptor_heap_GetCPUDescriptorHandleForHeapStart( + ID3D12DescriptorHeap *iface, D3D12_CPU_DESCRIPTOR_HANDLE *descriptor) +{ + struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(iface); + + TRACE("iface %p, descriptor %p.\n", iface, descriptor); + + descriptor->ptr = (SIZE_T)heap->descriptors; + + return descriptor; +} + +static D3D12_GPU_DESCRIPTOR_HANDLE * STDMETHODCALLTYPE d3d12_descriptor_heap_GetGPUDescriptorHandleForHeapStart( + ID3D12DescriptorHeap *iface, D3D12_GPU_DESCRIPTOR_HANDLE *descriptor) +{ + struct d3d12_descriptor_heap *heap = impl_from_ID3D12DescriptorHeap(iface); + + TRACE("iface %p, descriptor %p.\n", iface, descriptor); + + descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; + + return descriptor; +} + +static const struct ID3D12DescriptorHeapVtbl d3d12_descriptor_heap_vtbl = +{ + /* IUnknown methods */ + d3d12_descriptor_heap_QueryInterface, + d3d12_descriptor_heap_AddRef, + d3d12_descriptor_heap_Release, + /* ID3D12Object methods */ + d3d12_descriptor_heap_GetPrivateData, + d3d12_descriptor_heap_SetPrivateData, + d3d12_descriptor_heap_SetPrivateDataInterface, + d3d12_descriptor_heap_SetName, + /* ID3D12DeviceChild methods */ + d3d12_descriptor_heap_GetDevice, + /* ID3D12DescriptorHeap methods */ + d3d12_descriptor_heap_GetDesc, + d3d12_descriptor_heap_GetCPUDescriptorHandleForHeapStart, + d3d12_descriptor_heap_GetGPUDescriptorHandleForHeapStart, +}; + +static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) +{ + HRESULT hr; + + descriptor_heap->ID3D12DescriptorHeap_iface.lpVtbl = &d3d12_descriptor_heap_vtbl; + descriptor_heap->refcount = 1; + + descriptor_heap->desc = *desc; + + if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) + return hr; + + d3d12_device_add_ref(descriptor_heap->device = device); + + return S_OK; +} + +HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, + const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap) +{ + size_t max_descriptor_count, descriptor_size; + struct d3d12_descriptor_heap *object; + HRESULT hr; + + if (!(descriptor_size = d3d12_device_get_descriptor_handle_increment_size(device, desc->Type))) + { + WARN("No descriptor size for descriptor type %#x.\n", desc->Type); + return E_INVALIDARG; + } + + if ((desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) + && (desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_RTV || desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_DSV)) + { + WARN("RTV/DSV descriptor heaps cannot be shader visible.\n"); + return E_INVALIDARG; + } + + max_descriptor_count = (~(size_t)0 - sizeof(*object)) / descriptor_size; + if (desc->NumDescriptors > max_descriptor_count) + { + WARN("Invalid descriptor count %u (max %zu).\n", desc->NumDescriptors, max_descriptor_count); + return E_OUTOFMEMORY; + } + + if (!(object = vkd3d_malloc(offsetof(struct d3d12_descriptor_heap, + descriptors[descriptor_size * desc->NumDescriptors])))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_descriptor_heap_init(object, device, desc))) + { + vkd3d_free(object); + return hr; + } + + memset(object->descriptors, 0, descriptor_size * desc->NumDescriptors); + + if ((desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || desc->Type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) + && device->vk_info.EXT_descriptor_indexing && !vkd3d_gpu_descriptor_allocator_register_range( + &device->gpu_descriptor_allocator, (struct d3d12_desc *)object->descriptors, desc->NumDescriptors)) + ERR("Failed to register descriptor range.\n"); + + TRACE("Created descriptor heap %p.\n", object); + + *descriptor_heap = object; + + return S_OK; +} + +/* ID3D12QueryHeap */ +static inline struct d3d12_query_heap *impl_from_ID3D12QueryHeap(ID3D12QueryHeap *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_query_heap, ID3D12QueryHeap_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_query_heap_QueryInterface(ID3D12QueryHeap *iface, + REFIID iid, void **out) +{ + TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out); + + if (IsEqualGUID(iid, &IID_ID3D12QueryHeap) + || IsEqualGUID(iid, &IID_ID3D12Pageable) + || IsEqualGUID(iid, &IID_ID3D12DeviceChild) + || IsEqualGUID(iid, &IID_ID3D12Object) + || IsEqualGUID(iid, &IID_IUnknown)) + { + ID3D12QueryHeap_AddRef(iface); + *out = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid)); + + *out = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_query_heap_AddRef(ID3D12QueryHeap *iface) +{ + struct d3d12_query_heap *heap = impl_from_ID3D12QueryHeap(iface); + ULONG refcount = InterlockedIncrement(&heap->refcount); + + TRACE("%p increasing refcount to %u.\n", heap, refcount); + + return refcount; +} + +static ULONG STDMETHODCALLTYPE d3d12_query_heap_Release(ID3D12QueryHeap *iface) +{ + struct d3d12_query_heap *heap = impl_from_ID3D12QueryHeap(iface); + ULONG refcount = InterlockedDecrement(&heap->refcount); + + TRACE("%p decreasing refcount to %u.\n", heap, refcount); + + if (!refcount) + { + struct d3d12_device *device = heap->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + vkd3d_private_store_destroy(&heap->private_store); + + VK_CALL(vkDestroyQueryPool(device->vk_device, heap->vk_query_pool, NULL)); + + vkd3d_free(heap); + + d3d12_device_release(device); + } + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_query_heap_GetPrivateData(ID3D12QueryHeap *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_query_heap *heap = impl_from_ID3D12QueryHeap(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&heap->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_query_heap_SetPrivateData(ID3D12QueryHeap *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_query_heap *heap = impl_from_ID3D12QueryHeap(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&heap->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_query_heap_SetPrivateDataInterface(ID3D12QueryHeap *iface, + REFGUID guid, const IUnknown *data) +{ + struct d3d12_query_heap *heap = impl_from_ID3D12QueryHeap(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&heap->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_query_heap_SetName(ID3D12QueryHeap *iface, const WCHAR *name) +{ + struct d3d12_query_heap *heap = impl_from_ID3D12QueryHeap(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, heap->device->wchar_size)); + + return vkd3d_set_vk_object_name(heap->device, (uint64_t)heap->vk_query_pool, + VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT, name); +} + +static HRESULT STDMETHODCALLTYPE d3d12_query_heap_GetDevice(ID3D12QueryHeap *iface, REFIID iid, void **device) +{ + struct d3d12_query_heap *heap = impl_from_ID3D12QueryHeap(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(heap->device, iid, device); +} + +static const struct ID3D12QueryHeapVtbl d3d12_query_heap_vtbl = +{ + /* IUnknown methods */ + d3d12_query_heap_QueryInterface, + d3d12_query_heap_AddRef, + d3d12_query_heap_Release, + /* ID3D12Object methods */ + d3d12_query_heap_GetPrivateData, + d3d12_query_heap_SetPrivateData, + d3d12_query_heap_SetPrivateDataInterface, + d3d12_query_heap_SetName, + /* ID3D12DeviceChild methods */ + d3d12_query_heap_GetDevice, +}; + +struct d3d12_query_heap *unsafe_impl_from_ID3D12QueryHeap(ID3D12QueryHeap *iface) +{ + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_query_heap_vtbl); + return impl_from_ID3D12QueryHeap(iface); +} + +HRESULT d3d12_query_heap_create(struct d3d12_device *device, const D3D12_QUERY_HEAP_DESC *desc, + struct d3d12_query_heap **heap) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct d3d12_query_heap *object; + VkQueryPoolCreateInfo pool_info; + unsigned int element_count; + VkResult vr; + HRESULT hr; + + element_count = DIV_ROUND_UP(desc->Count, sizeof(*object->availability_mask) * CHAR_BIT); + if (!(object = vkd3d_malloc(offsetof(struct d3d12_query_heap, availability_mask[element_count])))) + return E_OUTOFMEMORY; + + object->ID3D12QueryHeap_iface.lpVtbl = &d3d12_query_heap_vtbl; + object->refcount = 1; + object->device = device; + memset(object->availability_mask, 0, element_count * sizeof(*object->availability_mask)); + + pool_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + pool_info.pNext = NULL; + pool_info.flags = 0; + pool_info.queryCount = desc->Count; + + switch (desc->Type) + { + case D3D12_QUERY_HEAP_TYPE_OCCLUSION: + pool_info.queryType = VK_QUERY_TYPE_OCCLUSION; + pool_info.pipelineStatistics = 0; + break; + + case D3D12_QUERY_HEAP_TYPE_TIMESTAMP: + pool_info.queryType = VK_QUERY_TYPE_TIMESTAMP; + pool_info.pipelineStatistics = 0; + break; + + case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS: + pool_info.queryType = VK_QUERY_TYPE_PIPELINE_STATISTICS; + pool_info.pipelineStatistics = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT + | VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT + | VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT + | VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT + | VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT + | VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT + | VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT + | VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT + | VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT + | VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT + | VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT; + break; + + case D3D12_QUERY_HEAP_TYPE_SO_STATISTICS: + if (!device->vk_info.transform_feedback_queries) + { + FIXME("Transform feedback queries are not supported by Vulkan implementation.\n"); + vkd3d_free(object); + return E_NOTIMPL; + } + + pool_info.queryType = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT; + pool_info.pipelineStatistics = 0; + break; + + default: + WARN("Invalid query heap type %u.\n", desc->Type); + vkd3d_free(object); + return E_INVALIDARG; + } + + if (FAILED(hr = vkd3d_private_store_init(&object->private_store))) + { + vkd3d_free(object); + return hr; + } + + if ((vr = VK_CALL(vkCreateQueryPool(device->vk_device, &pool_info, NULL, &object->vk_query_pool))) < 0) + { + WARN("Failed to create Vulkan query pool, vr %d.\n", vr); + vkd3d_private_store_destroy(&object->private_store); + vkd3d_free(object); + return hresult_from_vk_result(vr); + } + + d3d12_device_add_ref(device); + + TRACE("Created query heap %p.\n", object); + + *heap = object; + + return S_OK; +} + +static HRESULT vkd3d_init_null_resources_data(struct vkd3d_null_resources *null_resource, + struct d3d12_device *device) +{ + const bool use_sparse_resources = device->vk_info.sparse_properties.residencyNonResidentStrict; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + static const VkClearColorValue clear_color = {{0}}; + VkCommandBufferAllocateInfo command_buffer_info; + VkCommandPool vk_command_pool = VK_NULL_HANDLE; + VkCommandPoolCreateInfo command_pool_info; + VkDevice vk_device = device->vk_device; + VkCommandBufferBeginInfo begin_info; + VkCommandBuffer vk_command_buffer; + VkFence vk_fence = VK_NULL_HANDLE; + VkImageSubresourceRange range; + VkImageMemoryBarrier barrier; + VkFenceCreateInfo fence_info; + struct vkd3d_queue *queue; + VkSubmitInfo submit_info; + VkQueue vk_queue; + VkResult vr; + + queue = d3d12_device_get_vkd3d_queue(device, D3D12_COMMAND_LIST_TYPE_DIRECT); + + command_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + command_pool_info.pNext = NULL; + command_pool_info.flags = 0; + command_pool_info.queueFamilyIndex = queue->vk_family_index; + + if ((vr = VK_CALL(vkCreateCommandPool(vk_device, &command_pool_info, NULL, &vk_command_pool))) < 0) + { + WARN("Failed to create Vulkan command pool, vr %d.\n", vr); + goto done; + } + + command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + command_buffer_info.pNext = NULL; + command_buffer_info.commandPool = vk_command_pool; + command_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + command_buffer_info.commandBufferCount = 1; + + if ((vr = VK_CALL(vkAllocateCommandBuffers(vk_device, &command_buffer_info, &vk_command_buffer))) < 0) + { + WARN("Failed to allocate Vulkan command buffer, vr %d.\n", vr); + goto done; + } + + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.pNext = NULL; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + begin_info.pInheritanceInfo = NULL; + + if ((vr = VK_CALL(vkBeginCommandBuffer(vk_command_buffer, &begin_info))) < 0) + { + WARN("Failed to begin command buffer, vr %d.\n", vr); + goto done; + } + + /* fill buffer */ + VK_CALL(vkCmdFillBuffer(vk_command_buffer, null_resource->vk_buffer, 0, VK_WHOLE_SIZE, 0x00000000)); + + if (use_sparse_resources) + { + /* transition 2D UAV image */ + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = NULL; + barrier.srcAccessMask = 0; + barrier.dstAccessMask = 0; + barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = null_resource->vk_2d_storage_image; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + + VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, + 0, NULL, 0, NULL, 1, &barrier)); + } + else + { + /* fill UAV buffer */ + VK_CALL(vkCmdFillBuffer(vk_command_buffer, + null_resource->vk_storage_buffer, 0, VK_WHOLE_SIZE, 0x00000000)); + + /* clear 2D UAV image */ + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = NULL; + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = null_resource->vk_2d_storage_image; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + + VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + 0, NULL, 0, NULL, 1, &barrier)); + + range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + range.baseMipLevel = 0; + range.levelCount = 1; + range.baseArrayLayer = 0; + range.layerCount = 1; + + VK_CALL(vkCmdClearColorImage(vk_command_buffer, + null_resource->vk_2d_storage_image, VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range)); + } + + /* transition 2D SRV image */ + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = NULL; + barrier.srcAccessMask = 0; + barrier.dstAccessMask = 0; + barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = null_resource->vk_2d_image; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + + VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, + 0, NULL, 0, NULL, 1, &barrier)); + + if ((vr = VK_CALL(vkEndCommandBuffer(vk_command_buffer))) < 0) + { + WARN("Failed to end command buffer, vr %d.\n", vr); + goto done; + } + + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_info.pNext = NULL; + fence_info.flags = 0; + + if ((vr = VK_CALL(vkCreateFence(device->vk_device, &fence_info, NULL, &vk_fence))) < 0) + { + WARN("Failed to create Vulkan fence, vr %d.\n", vr); + goto done; + } + + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = NULL; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = NULL; + submit_info.pWaitDstStageMask = NULL; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &vk_command_buffer; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = NULL; + + if (!(vk_queue = vkd3d_queue_acquire(queue))) + { + WARN("Failed to acquire queue %p.\n", queue); + goto done; + } + + if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))) < 0) + ERR("Failed to submit, vr %d.\n", vr); + + vkd3d_queue_release(queue); + + vr = VK_CALL(vkWaitForFences(device->vk_device, 1, &vk_fence, VK_FALSE, ~(uint64_t)0)); + if (vr != VK_SUCCESS) + WARN("Failed to wait for fence, vr %d.\n", vr); + +done: + VK_CALL(vkDestroyCommandPool(vk_device, vk_command_pool, NULL)); + VK_CALL(vkDestroyFence(vk_device, vk_fence, NULL)); + + return hresult_from_vk_result(vr); +} + +HRESULT vkd3d_init_null_resources(struct vkd3d_null_resources *null_resources, + struct d3d12_device *device) +{ + const bool use_sparse_resources = device->vk_info.sparse_properties.residencyNonResidentStrict; + D3D12_HEAP_PROPERTIES heap_properties; + D3D12_RESOURCE_DESC resource_desc; + HRESULT hr; + + TRACE("Creating resources for NULL views.\n"); + + memset(null_resources, 0, sizeof(*null_resources)); + + memset(&heap_properties, 0, sizeof(heap_properties)); + heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT; + + /* buffer */ + resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resource_desc.Alignment = 0; + resource_desc.Width = VKD3D_NULL_BUFFER_SIZE; + resource_desc.Height = 1; + resource_desc.DepthOrArraySize = 1; + resource_desc.MipLevels = 1; + resource_desc.Format = DXGI_FORMAT_UNKNOWN; + resource_desc.SampleDesc.Count = 1; + resource_desc.SampleDesc.Quality = 0; + resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE; + + if (FAILED(hr = vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_NONE, + &resource_desc, &null_resources->vk_buffer))) + goto fail; + if (FAILED(hr = vkd3d_allocate_buffer_memory(device, null_resources->vk_buffer, + &heap_properties, D3D12_HEAP_FLAG_NONE, &null_resources->vk_buffer_memory, NULL, NULL))) + goto fail; + + /* buffer UAV */ + resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + if (FAILED(hr = vkd3d_create_buffer(device, use_sparse_resources ? NULL : &heap_properties, D3D12_HEAP_FLAG_NONE, + &resource_desc, &null_resources->vk_storage_buffer))) + goto fail; + if (!use_sparse_resources && FAILED(hr = vkd3d_allocate_buffer_memory(device, null_resources->vk_storage_buffer, + &heap_properties, D3D12_HEAP_FLAG_NONE, &null_resources->vk_storage_buffer_memory, NULL, NULL))) + goto fail; + + /* 2D SRV */ + resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + resource_desc.Alignment = 0; + resource_desc.Width = 1; + resource_desc.Height = 1; + resource_desc.DepthOrArraySize = 1; + resource_desc.MipLevels = 1; + resource_desc.Format = VKD3D_NULL_VIEW_FORMAT; + resource_desc.SampleDesc.Count = 1; + resource_desc.SampleDesc.Quality = 0; + resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE; + + if (FAILED(hr = vkd3d_create_image(device, &heap_properties, D3D12_HEAP_FLAG_NONE, + &resource_desc, NULL, &null_resources->vk_2d_image))) + goto fail; + if (FAILED(hr = vkd3d_allocate_image_memory(device, null_resources->vk_2d_image, + &heap_properties, D3D12_HEAP_FLAG_NONE, &null_resources->vk_2d_image_memory, NULL, NULL))) + goto fail; + + /* 2D UAV */ + resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + resource_desc.Alignment = 0; + resource_desc.Width = 1; + resource_desc.Height = 1; + resource_desc.DepthOrArraySize = 1; + resource_desc.MipLevels = 1; + resource_desc.Format = VKD3D_NULL_VIEW_FORMAT; + resource_desc.SampleDesc.Count = 1; + resource_desc.SampleDesc.Quality = 0; + resource_desc.Layout = use_sparse_resources + ? D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE : D3D12_TEXTURE_LAYOUT_UNKNOWN; + resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + if (FAILED(hr = vkd3d_create_image(device, use_sparse_resources ? NULL : &heap_properties, D3D12_HEAP_FLAG_NONE, + &resource_desc, NULL, &null_resources->vk_2d_storage_image))) + goto fail; + if (!use_sparse_resources && FAILED(hr = vkd3d_allocate_image_memory(device, null_resources->vk_2d_storage_image, + &heap_properties, D3D12_HEAP_FLAG_NONE, &null_resources->vk_2d_storage_image_memory, NULL, NULL))) + goto fail; + + /* set Vulkan object names */ + vkd3d_set_vk_object_name_utf8(device, (uint64_t)null_resources->vk_buffer, + VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, "NULL buffer"); + vkd3d_set_vk_object_name_utf8(device, (uint64_t)null_resources->vk_buffer_memory, + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, "NULL memory"); + vkd3d_set_vk_object_name_utf8(device, (uint64_t)null_resources->vk_storage_buffer, + VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, "NULL UAV buffer"); + vkd3d_set_vk_object_name_utf8(device, (uint64_t)null_resources->vk_2d_image, + VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT, "NULL 2D SRV image"); + vkd3d_set_vk_object_name_utf8(device, (uint64_t)null_resources->vk_2d_image_memory, + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, "NULL 2D SRV memory"); + vkd3d_set_vk_object_name_utf8(device, (uint64_t)null_resources->vk_2d_storage_image, + VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT, "NULL 2D UAV image"); + if (!use_sparse_resources) + { + vkd3d_set_vk_object_name_utf8(device, (uint64_t)null_resources->vk_storage_buffer_memory, + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, "NULL UAV buffer memory"); + vkd3d_set_vk_object_name_utf8(device, (uint64_t)null_resources->vk_2d_storage_image_memory, + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, "NULL 2D UAV memory"); + } + + return vkd3d_init_null_resources_data(null_resources, device); + +fail: + ERR("Failed to initialize NULL resources, hr %#x.\n", hr); + vkd3d_destroy_null_resources(null_resources, device); + return hr; +} + +void vkd3d_destroy_null_resources(struct vkd3d_null_resources *null_resources, + struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + VK_CALL(vkDestroyBuffer(device->vk_device, null_resources->vk_buffer, NULL)); + VK_CALL(vkFreeMemory(device->vk_device, null_resources->vk_buffer_memory, NULL)); + + VK_CALL(vkDestroyBuffer(device->vk_device, null_resources->vk_storage_buffer, NULL)); + VK_CALL(vkFreeMemory(device->vk_device, null_resources->vk_storage_buffer_memory, NULL)); + + VK_CALL(vkDestroyImage(device->vk_device, null_resources->vk_2d_image, NULL)); + VK_CALL(vkFreeMemory(device->vk_device, null_resources->vk_2d_image_memory, NULL)); + + VK_CALL(vkDestroyImage(device->vk_device, null_resources->vk_2d_storage_image, NULL)); + VK_CALL(vkFreeMemory(device->vk_device, null_resources->vk_2d_storage_image_memory, NULL)); + + memset(null_resources, 0, sizeof(*null_resources)); +} diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c new file mode 100644 index 00000000000..2d3954d29ae --- /dev/null +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -0,0 +1,3572 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * Copyright 2016 Henri Verbeet for CodeWeavers + * Copyright 2021 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_private.h" +#include "vkd3d_shaders.h" + +/* ID3D12RootSignature */ +static inline struct d3d12_root_signature *impl_from_ID3D12RootSignature(ID3D12RootSignature *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_root_signature, ID3D12RootSignature_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_root_signature_QueryInterface(ID3D12RootSignature *iface, + REFIID riid, void **object) +{ + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + + if (IsEqualGUID(riid, &IID_ID3D12RootSignature) + || IsEqualGUID(riid, &IID_ID3D12DeviceChild) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { + ID3D12RootSignature_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_root_signature_AddRef(ID3D12RootSignature *iface) +{ + struct d3d12_root_signature *root_signature = impl_from_ID3D12RootSignature(iface); + ULONG refcount = InterlockedIncrement(&root_signature->refcount); + + TRACE("%p increasing refcount to %u.\n", root_signature, refcount); + + return refcount; +} + +static void d3d12_descriptor_set_layout_cleanup( + struct d3d12_descriptor_set_layout *layout, struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, layout->vk_layout, NULL)); +} + +static void d3d12_root_signature_cleanup(struct d3d12_root_signature *root_signature, + struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + unsigned int i; + + if (root_signature->vk_pipeline_layout) + VK_CALL(vkDestroyPipelineLayout(device->vk_device, root_signature->vk_pipeline_layout, NULL)); + for (i = 0; i < root_signature->vk_set_count; ++i) + { + d3d12_descriptor_set_layout_cleanup(&root_signature->descriptor_set_layouts[i], device); + } + + if (root_signature->parameters) + { + for (i = 0; i < root_signature->parameter_count; ++i) + { + if (root_signature->parameters[i].parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + vkd3d_free(root_signature->parameters[i].u.descriptor_table.ranges); + } + vkd3d_free(root_signature->parameters); + } + + if (root_signature->descriptor_mapping) + vkd3d_free(root_signature->descriptor_mapping); + vkd3d_free(root_signature->descriptor_offsets); + if (root_signature->root_constants) + vkd3d_free(root_signature->root_constants); + + for (i = 0; i < root_signature->static_sampler_count; ++i) + { + if (root_signature->static_samplers[i]) + VK_CALL(vkDestroySampler(device->vk_device, root_signature->static_samplers[i], NULL)); + } + if (root_signature->static_samplers) + vkd3d_free(root_signature->static_samplers); +} + +static ULONG STDMETHODCALLTYPE d3d12_root_signature_Release(ID3D12RootSignature *iface) +{ + struct d3d12_root_signature *root_signature = impl_from_ID3D12RootSignature(iface); + ULONG refcount = InterlockedDecrement(&root_signature->refcount); + + TRACE("%p decreasing refcount to %u.\n", root_signature, refcount); + + if (!refcount) + { + struct d3d12_device *device = root_signature->device; + vkd3d_private_store_destroy(&root_signature->private_store); + d3d12_root_signature_cleanup(root_signature, device); + vkd3d_free(root_signature); + d3d12_device_release(device); + } + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_root_signature_GetPrivateData(ID3D12RootSignature *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_root_signature *root_signature = impl_from_ID3D12RootSignature(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&root_signature->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_root_signature_SetPrivateData(ID3D12RootSignature *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_root_signature *root_signature = impl_from_ID3D12RootSignature(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&root_signature->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_root_signature_SetPrivateDataInterface(ID3D12RootSignature *iface, + REFGUID guid, const IUnknown *data) +{ + struct d3d12_root_signature *root_signature = impl_from_ID3D12RootSignature(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&root_signature->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_root_signature_SetName(ID3D12RootSignature *iface, const WCHAR *name) +{ + struct d3d12_root_signature *root_signature = impl_from_ID3D12RootSignature(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, root_signature->device->wchar_size)); + + return name ? S_OK : E_INVALIDARG; +} + +static HRESULT STDMETHODCALLTYPE d3d12_root_signature_GetDevice(ID3D12RootSignature *iface, + REFIID iid, void **device) +{ + struct d3d12_root_signature *root_signature = impl_from_ID3D12RootSignature(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(root_signature->device, iid, device); +} + +static const struct ID3D12RootSignatureVtbl d3d12_root_signature_vtbl = +{ + /* IUnknown methods */ + d3d12_root_signature_QueryInterface, + d3d12_root_signature_AddRef, + d3d12_root_signature_Release, + /* ID3D12Object methods */ + d3d12_root_signature_GetPrivateData, + d3d12_root_signature_SetPrivateData, + d3d12_root_signature_SetPrivateDataInterface, + d3d12_root_signature_SetName, + /* ID3D12DeviceChild methods */ + d3d12_root_signature_GetDevice, +}; + +struct d3d12_root_signature *unsafe_impl_from_ID3D12RootSignature(ID3D12RootSignature *iface) +{ + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_root_signature_vtbl); + return impl_from_ID3D12RootSignature(iface); +} + +static VkShaderStageFlags stage_flags_from_visibility(D3D12_SHADER_VISIBILITY visibility) +{ + switch (visibility) + { + case D3D12_SHADER_VISIBILITY_ALL: + return VK_SHADER_STAGE_ALL; + case D3D12_SHADER_VISIBILITY_VERTEX: + return VK_SHADER_STAGE_VERTEX_BIT; + case D3D12_SHADER_VISIBILITY_HULL: + return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + case D3D12_SHADER_VISIBILITY_DOMAIN: + return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + case D3D12_SHADER_VISIBILITY_GEOMETRY: + return VK_SHADER_STAGE_GEOMETRY_BIT; + case D3D12_SHADER_VISIBILITY_PIXEL: + return VK_SHADER_STAGE_FRAGMENT_BIT; + default: + return 0; + } +} + +static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHADER_VISIBILITY visibility) +{ + switch (visibility) + { + case D3D12_SHADER_VISIBILITY_ALL: + return VKD3D_SHADER_VISIBILITY_ALL; + case D3D12_SHADER_VISIBILITY_VERTEX: + return VKD3D_SHADER_VISIBILITY_VERTEX; + case D3D12_SHADER_VISIBILITY_HULL: + return VKD3D_SHADER_VISIBILITY_HULL; + case D3D12_SHADER_VISIBILITY_DOMAIN: + return VKD3D_SHADER_VISIBILITY_DOMAIN; + case D3D12_SHADER_VISIBILITY_GEOMETRY: + return VKD3D_SHADER_VISIBILITY_GEOMETRY; + case D3D12_SHADER_VISIBILITY_PIXEL: + return VKD3D_SHADER_VISIBILITY_PIXEL; + default: + FIXME("Unhandled visibility %#x.\n", visibility); + return VKD3D_SHADER_VISIBILITY_ALL; + } +} + +static VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d_shader_descriptor_type type, + bool is_buffer) +{ + switch (type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: + return is_buffer ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: + return is_buffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: + return VK_DESCRIPTOR_TYPE_SAMPLER; + default: + FIXME("Unhandled descriptor range type type %#x.\n", type); + return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + } +} + +static VkDescriptorType vk_descriptor_type_from_d3d12_root_parameter(D3D12_ROOT_PARAMETER_TYPE type) +{ + switch (type) + { + /* SRV and UAV root parameters are buffer views. */ + case D3D12_ROOT_PARAMETER_TYPE_SRV: + return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + case D3D12_ROOT_PARAMETER_TYPE_UAV: + return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + case D3D12_ROOT_PARAMETER_TYPE_CBV: + return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + default: + FIXME("Unhandled descriptor root parameter type %#x.\n", type); + return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + } +} + +static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type( + D3D12_DESCRIPTOR_RANGE_TYPE type) +{ + switch (type) + { + case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: + return VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; + case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: + return VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; + case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: + return VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: + return VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; + default: + FIXME("Unhandled descriptor range type type %#x.\n", type); + return VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; + } +} + +static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_root_parameter_type( + D3D12_ROOT_PARAMETER_TYPE type) +{ + switch (type) + { + case D3D12_ROOT_PARAMETER_TYPE_SRV: + return VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; + case D3D12_ROOT_PARAMETER_TYPE_UAV: + return VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; + case D3D12_ROOT_PARAMETER_TYPE_CBV: + return VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + default: + FIXME("Unhandled descriptor root parameter type %#x.\n", type); + return VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; + } +} + +static bool vk_binding_from_d3d12_descriptor_range(struct VkDescriptorSetLayoutBinding *binding_desc, + enum vkd3d_shader_descriptor_type descriptor_type, D3D12_SHADER_VISIBILITY shader_visibility, + bool is_buffer, uint32_t vk_binding, unsigned int descriptor_count) +{ + binding_desc->binding = vk_binding; + binding_desc->descriptorType + = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, is_buffer); + binding_desc->descriptorCount = descriptor_count; + binding_desc->stageFlags = stage_flags_from_visibility(shader_visibility); + binding_desc->pImmutableSamplers = NULL; + + return true; +} + +struct d3d12_root_signature_info +{ + size_t binding_count; + + size_t root_constant_count; + size_t root_descriptor_count; + + unsigned int cbv_count; + unsigned int srv_count; + unsigned int uav_count; + unsigned int sampler_count; + unsigned int cbv_unbounded_range_count; + unsigned int srv_unbounded_range_count; + unsigned int uav_unbounded_range_count; + unsigned int sampler_unbounded_range_count; + + size_t cost; +}; + +static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info, + const D3D12_ROOT_DESCRIPTOR_TABLE *table, bool use_array) +{ + bool cbv_unbounded_range = false, srv_unbounded_range = false, uav_unbounded_range = false; + bool sampler_unbounded_range = false; + bool unbounded = false; + unsigned int i, count; + + for (i = 0; i < table->NumDescriptorRanges; ++i) + { + const D3D12_DESCRIPTOR_RANGE *range = &table->pDescriptorRanges[i]; + unsigned int binding_count; + + if (!range->NumDescriptors) + { + WARN("A descriptor range is empty.\n"); + return E_INVALIDARG; + } + + if (range->NumDescriptors != UINT_MAX && !vkd3d_bound_range(range->BaseShaderRegister, + range->NumDescriptors, UINT_MAX)) + { + WARN("A descriptor range overflows.\n"); + return E_INVALIDARG; + } + + if (unbounded && range->OffsetInDescriptorsFromTableStart == D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + { + WARN("An unbounded range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " + "another unbounded range.\n"); + return E_INVALIDARG; + } + + count = range->NumDescriptors; + if (range->NumDescriptors == UINT_MAX) + { + unbounded = true; + count = 0; + } + + binding_count = use_array ? 1 : range->NumDescriptors; + + switch (range->RangeType) + { + case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: + /* XXX: Vulkan buffer and image descriptors have different types. In order + * to preserve compatibility between Vulkan resource bindings for the same + * root signature, we create descriptor set layouts with two bindings for + * each SRV and UAV. */ + info->binding_count += binding_count; + info->srv_count += count * 2u; + srv_unbounded_range |= unbounded; + break; + case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: + /* As above. */ + info->binding_count += binding_count; + info->uav_count += count * 2u; + uav_unbounded_range |= unbounded; + break; + case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: + info->cbv_count += count; + cbv_unbounded_range |= unbounded; + break; + case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: + info->sampler_count += count; + sampler_unbounded_range |= unbounded; + break; + default: + FIXME("Unhandled descriptor type %#x.\n", range->RangeType); + return E_NOTIMPL; + } + + info->binding_count += binding_count; + } + + if (unbounded && !use_array) + { + FIXME("The device does not support unbounded descriptor ranges.\n"); + return E_FAIL; + } + + info->srv_unbounded_range_count += srv_unbounded_range * 2u; + info->uav_unbounded_range_count += uav_unbounded_range * 2u; + info->cbv_unbounded_range_count += cbv_unbounded_range; + info->sampler_unbounded_range_count += sampler_unbounded_range; + + return S_OK; +} + +static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_info *info, + const D3D12_ROOT_SIGNATURE_DESC *desc, bool use_array) +{ + unsigned int i; + HRESULT hr; + + memset(info, 0, sizeof(*info)); + + for (i = 0; i < desc->NumParameters; ++i) + { + const D3D12_ROOT_PARAMETER *p = &desc->pParameters[i]; + + switch (p->ParameterType) + { + case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + if (FAILED(hr = d3d12_root_signature_info_count_descriptors(info, + &p->u.DescriptorTable, use_array))) + return hr; + ++info->cost; + break; + + case D3D12_ROOT_PARAMETER_TYPE_CBV: + ++info->root_descriptor_count; + ++info->cbv_count; + ++info->binding_count; + info->cost += 2; + break; + case D3D12_ROOT_PARAMETER_TYPE_SRV: + ++info->root_descriptor_count; + ++info->srv_count; + ++info->binding_count; + info->cost += 2; + break; + case D3D12_ROOT_PARAMETER_TYPE_UAV: + ++info->root_descriptor_count; + ++info->uav_count; + ++info->binding_count; + info->cost += 2; + break; + + case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + ++info->root_constant_count; + info->cost += p->u.Constants.Num32BitValues; + break; + + default: + FIXME("Unhandled type %#x for parameter %u.\n", p->ParameterType, i); + return E_NOTIMPL; + } + } + + info->binding_count += desc->NumStaticSamplers; + info->sampler_count += desc->NumStaticSamplers; + + return S_OK; +} + +static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signature *root_signature, + const D3D12_ROOT_SIGNATURE_DESC *desc, + struct VkPushConstantRange push_constants[D3D12_SHADER_VISIBILITY_PIXEL + 1], + uint32_t *push_constant_range_count) +{ + uint32_t push_constants_offset[D3D12_SHADER_VISIBILITY_PIXEL + 1]; + unsigned int i, j, push_constant_count; + uint32_t offset; + + memset(push_constants, 0, (D3D12_SHADER_VISIBILITY_PIXEL + 1) * sizeof(*push_constants)); + memset(push_constants_offset, 0, sizeof(push_constants_offset)); + for (i = 0; i < desc->NumParameters; ++i) + { + const D3D12_ROOT_PARAMETER *p = &desc->pParameters[i]; + if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS) + continue; + + assert(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL); + push_constants[p->ShaderVisibility].stageFlags = stage_flags_from_visibility(p->ShaderVisibility); + push_constants[p->ShaderVisibility].size += p->u.Constants.Num32BitValues * sizeof(uint32_t); + } + if (push_constants[D3D12_SHADER_VISIBILITY_ALL].size) + { + /* When D3D12_SHADER_VISIBILITY_ALL is used we use a single push + * constants range because the Vulkan spec states: + * + * "Any two elements of pPushConstantRanges must not include the same + * stage in stageFlags". + */ + push_constant_count = 1; + for (i = 0; i <= D3D12_SHADER_VISIBILITY_PIXEL; ++i) + { + if (i == D3D12_SHADER_VISIBILITY_ALL) + continue; + + push_constants[D3D12_SHADER_VISIBILITY_ALL].size += push_constants[i].size; + push_constants[i].size = 0; + } + } + else + { + /* Move non-empty push constants ranges to front and compute offsets. */ + offset = 0; + for (i = 0, j = 0; i <= D3D12_SHADER_VISIBILITY_PIXEL; ++i) + { + if (push_constants[i].size) + { + push_constants[j] = push_constants[i]; + push_constants[j].offset = offset; + push_constants_offset[i] = offset; + offset += push_constants[j].size; + ++j; + } + } + push_constant_count = j; + } + + for (i = 0, j = 0; i < desc->NumParameters; ++i) + { + struct d3d12_root_constant *root_constant = &root_signature->parameters[i].u.constant; + const D3D12_ROOT_PARAMETER *p = &desc->pParameters[i]; + unsigned int idx; + + if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS) + continue; + + idx = push_constant_count == 1 ? 0 : p->ShaderVisibility; + offset = push_constants_offset[idx]; + push_constants_offset[idx] += p->u.Constants.Num32BitValues * sizeof(uint32_t); + + root_signature->parameters[i].parameter_type = p->ParameterType; + root_constant->stage_flags = push_constant_count == 1 + ? push_constants[0].stageFlags : stage_flags_from_visibility(p->ShaderVisibility); + root_constant->offset = offset; + + root_signature->root_constants[j].register_space = p->u.Constants.RegisterSpace; + root_signature->root_constants[j].register_index = p->u.Constants.ShaderRegister; + root_signature->root_constants[j].shader_visibility + = vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility); + root_signature->root_constants[j].offset = offset; + root_signature->root_constants[j].size = p->u.Constants.Num32BitValues * sizeof(uint32_t); + + ++j; + } + + *push_constant_range_count = push_constant_count; + + return S_OK; +} + +struct vkd3d_descriptor_set_context +{ + VkDescriptorSetLayoutBinding *current_binding; + VkDescriptorSetLayoutBinding *first_binding; + unsigned int table_index; + unsigned int unbounded_offset; + unsigned int descriptor_index; + uint32_t descriptor_binding; +}; + +static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, unsigned int set_count) +{ + uint32_t max_count = min(VKD3D_MAX_DESCRIPTOR_SETS, device->vk_info.device_limits.maxBoundDescriptorSets); + + if (set_count > max_count) + { + ERR("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count); + return false; + } + + return true; +} + +static HRESULT vkd3d_create_descriptor_set_layout(struct d3d12_device *device, + VkDescriptorSetLayoutCreateFlags flags, unsigned int binding_count, bool unbounded, + const VkDescriptorSetLayoutBinding *bindings, VkDescriptorSetLayout *set_layout); + +static HRESULT d3d12_root_signature_append_descriptor_set_layout(struct d3d12_root_signature *root_signature, + struct vkd3d_descriptor_set_context *context, VkDescriptorSetLayoutCreateFlags flags) +{ + struct d3d12_descriptor_set_layout *layout; + unsigned int index; + HRESULT hr; + + if (!context->descriptor_binding) + return S_OK; + + index = root_signature->vk_set_count; + layout = &root_signature->descriptor_set_layouts[index]; + + if (!vkd3d_validate_descriptor_set_count(root_signature->device, index + 1)) + return E_INVALIDARG; + + if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, flags, context->descriptor_binding, + context->unbounded_offset != UINT_MAX, context->first_binding, &layout->vk_layout))) + return hr; + layout->table_index = context->table_index; + layout->unbounded_offset = context->unbounded_offset; + ++root_signature->vk_set_count; + + context->current_binding = context->first_binding; + context->descriptor_binding = 0; + + return S_OK; +} + +static void d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, + enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int register_idx, + bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, + unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context) +{ + struct vkd3d_shader_descriptor_offset *offset = root_signature->descriptor_offsets + ? &root_signature->descriptor_offsets[context->descriptor_index] : NULL; + struct vkd3d_shader_resource_binding *mapping + = &root_signature->descriptor_mapping[context->descriptor_index++]; + + mapping->type = descriptor_type; + mapping->register_space = register_space; + mapping->register_index = register_idx; + mapping->shader_visibility = shader_visibility; + mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + mapping->binding.set = root_signature->vk_set_count; + mapping->binding.binding = context->descriptor_binding++; + mapping->binding.count = descriptor_count; + if (offset) + { + offset->static_offset = 0; + offset->dynamic_offset_index = ~0u; + } + + if (context->unbounded_offset != UINT_MAX) + d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); +} + +static uint32_t d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, + enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int base_register_idx, + unsigned int binding_count, bool is_buffer_descriptor, bool duplicate_descriptors, + enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) +{ + uint32_t first_binding; + unsigned int i; + + is_buffer_descriptor |= descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + duplicate_descriptors = (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV + || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + && duplicate_descriptors; + + first_binding = context->descriptor_binding; + for (i = 0; i < binding_count; ++i) + { + if (duplicate_descriptors) + d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, + base_register_idx + i, true, shader_visibility, 1, context); + + d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, + base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context); + } + return first_binding; +} + +static uint32_t vkd3d_descriptor_magic_from_d3d12(D3D12_DESCRIPTOR_RANGE_TYPE type) +{ + switch (type) + { + case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: + return VKD3D_DESCRIPTOR_MAGIC_SRV; + case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: + return VKD3D_DESCRIPTOR_MAGIC_UAV; + case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: + return VKD3D_DESCRIPTOR_MAGIC_CBV; + case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: + return VKD3D_DESCRIPTOR_MAGIC_SAMPLER; + default: + ERR("Invalid range type %#x.\n", type); + return VKD3D_DESCRIPTOR_MAGIC_FREE; + } +} + +static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range, + const struct d3d12_root_signature_info *info, const struct vkd3d_device_descriptor_limits *limits) +{ + unsigned int count, limit; + + if (range->descriptor_count != UINT_MAX) + return range->descriptor_count; + + switch (range->type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + limit = limits->uniform_buffer_max_descriptors; + count = (limit - min(info->cbv_count, limit)) / info->cbv_unbounded_range_count; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: + limit = limits->sampled_image_max_descriptors; + count = (limit - min(info->srv_count, limit)) / info->srv_unbounded_range_count; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: + limit = limits->storage_image_max_descriptors; + count = (limit - min(info->uav_count, limit)) / info->uav_unbounded_range_count; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: + limit = limits->sampler_max_descriptors; + count = (limit - min(info->sampler_count, limit)) / info->sampler_unbounded_range_count; + break; + default: + ERR("Unhandled type %#x.\n", range->type); + return 1; + } + + if (!count) + { + WARN("Descriptor table exceeds type %#x limit of %u.\n", range->type, limit); + count = 1; + } + + return min(count, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); +} + +static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, + struct vkd3d_descriptor_set_context *context) +{ + enum vkd3d_shader_visibility shader_visibility = vkd3d_shader_visibility_from_d3d12(visibility); + bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + enum vkd3d_shader_descriptor_type descriptor_type = range->type; + + if (range->descriptor_count == UINT_MAX) + context->unbounded_offset = range->offset; + + if (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + { + if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, + descriptor_type, visibility, true, context->descriptor_binding, range->vk_binding_count)) + return E_NOTIMPL; + ++context->current_binding; + + d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, + range->base_register_idx, true, shader_visibility, range->vk_binding_count, context); + } + + if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, + descriptor_type, visibility, is_buffer, context->descriptor_binding, range->vk_binding_count)) + return E_NOTIMPL; + ++context->current_binding; + + d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, + range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context); + + context->unbounded_offset = UINT_MAX; + + return S_OK; +} + +static void d3d12_root_signature_map_vk_unbounded_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, unsigned int descriptor_offset, bool buffer_descriptor, + enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) +{ + struct vkd3d_shader_resource_binding *mapping = &root_signature->descriptor_mapping[context->descriptor_index]; + struct vkd3d_shader_descriptor_offset *offset = &root_signature->descriptor_offsets[context->descriptor_index++]; + + mapping->type = range->type; + mapping->register_space = range->register_space; + mapping->register_index = range->base_register_idx; + mapping->shader_visibility = shader_visibility; + mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + mapping->binding.set = root_signature->main_set + range->set + ((range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV + || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) && !buffer_descriptor); + mapping->binding.binding = range->binding; + mapping->binding.count = range->vk_binding_count; + offset->static_offset = descriptor_offset; + offset->dynamic_offset_index = ~0u; +} + +static void d3d12_root_signature_map_descriptor_unbounded_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, unsigned int descriptor_offset, + enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) +{ + bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + + if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + d3d12_root_signature_map_vk_unbounded_binding(root_signature, range, + descriptor_offset, true, shader_visibility, context); + + d3d12_root_signature_map_vk_unbounded_binding(root_signature, range, + descriptor_offset, is_buffer, shader_visibility, context); +} + +static int compare_register_range(const void *a, const void *b) +{ + const struct d3d12_root_descriptor_table_range *range_a = a, *range_b = b; + int ret; + + if ((ret = vkd3d_u32_compare(range_a->type, range_b->type))) + return ret; + + if ((ret = vkd3d_u32_compare(range_a->register_space, range_b->register_space))) + return ret; + + return vkd3d_u32_compare(range_a->base_register_idx, range_b->base_register_idx); +} + +static int compare_descriptor_range(const void *a, const void *b) +{ + const struct d3d12_root_descriptor_table_range *range_a = a, *range_b = b; + int ret; + + if ((ret = vkd3d_u32_compare(range_a->type, range_b->type))) + return ret; + + if ((ret = vkd3d_u32_compare(range_a->offset, range_b->offset))) + return ret; + + return (range_a->descriptor_count == UINT_MAX) - (range_b->descriptor_count == UINT_MAX); +} + +static HRESULT validate_descriptor_register_ranges(const struct d3d12_root_descriptor_table_range *ranges, + unsigned int count) +{ + const struct d3d12_root_descriptor_table_range *range, *prev; + unsigned int i; + + for (i = 1; i < count; ++i) + { + range = &ranges[i]; + prev = &ranges[i - 1]; + + if (range->type == prev->type && range->register_space == prev->register_space + && range->base_register_idx - prev->base_register_idx < prev->descriptor_count) + return E_INVALIDARG; + } + + return S_OK; +} + +static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature, + const D3D12_ROOT_SIGNATURE_DESC *desc, const struct d3d12_root_signature_info *info, + struct vkd3d_descriptor_set_context *context) +{ + const struct d3d12_device *device = root_signature->device; + struct d3d12_root_descriptor_table *table; + unsigned int i, j, k, range_count; + uint32_t vk_binding; + HRESULT hr; + + root_signature->descriptor_table_mask = 0; + + for (i = 0; i < desc->NumParameters; ++i) + { + const struct d3d12_root_descriptor_table_range *base_range = NULL; + const D3D12_ROOT_PARAMETER *p = &desc->pParameters[i]; + enum vkd3d_shader_visibility shader_visibility; + unsigned int offset = 0; + + if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + continue; + + root_signature->descriptor_table_mask |= 1ull << i; + + table = &root_signature->parameters[i].u.descriptor_table; + range_count = p->u.DescriptorTable.NumDescriptorRanges; + shader_visibility = vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility); + + root_signature->parameters[i].parameter_type = p->ParameterType; + table->range_count = range_count; + if (!(table->ranges = vkd3d_calloc(table->range_count, sizeof(*table->ranges)))) + return E_OUTOFMEMORY; + + context->table_index = i; + + for (j = 0; j < range_count; ++j) + { + const D3D12_DESCRIPTOR_RANGE *range = &p->u.DescriptorTable.pDescriptorRanges[j]; + + if (range->OffsetInDescriptorsFromTableStart != D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + offset = range->OffsetInDescriptorsFromTableStart; + + if (range->NumDescriptors != UINT_MAX && !vkd3d_bound_range(offset, range->NumDescriptors, UINT_MAX)) + return E_INVALIDARG; + + table->ranges[j].offset = offset; + table->ranges[j].descriptor_count = range->NumDescriptors; + table->ranges[j].type = vkd3d_descriptor_type_from_d3d12_range_type(range->RangeType); + table->ranges[j].descriptor_magic = vkd3d_descriptor_magic_from_d3d12(range->RangeType); + table->ranges[j].register_space = range->RegisterSpace; + table->ranges[j].base_register_idx = range->BaseShaderRegister; + + TRACE("Descriptor table %u, range %u, offset %u, type %#x, count %u.\n", i, j, + offset, range->RangeType, range->NumDescriptors); + + /* If NumDescriptors == UINT_MAX, validation during counting ensures this offset is not used. */ + offset += range->NumDescriptors; + } + + qsort(table->ranges, range_count, sizeof(*table->ranges), compare_register_range); + if (FAILED(hr = validate_descriptor_register_ranges(table->ranges, range_count))) + return hr; + + qsort(table->ranges, range_count, sizeof(*table->ranges), compare_descriptor_range); + + for (j = 0; j < range_count; ++j) + { + struct d3d12_root_descriptor_table_range *range; + VkDescriptorSetLayoutBinding *cur_binding; + + range = &table->ranges[j]; + + range->set = root_signature->vk_set_count - root_signature->main_set; + + if (root_signature->use_descriptor_arrays) + { + if (j && range->type != table->ranges[j - 1].type) + base_range = NULL; + + /* Bounded and unbounded ranges can follow unbounded ones, + * so map them all into the first unbounded range. */ + if (base_range) + { + unsigned int rel_offset = range->offset - base_range->offset; + + if (rel_offset >= base_range->vk_binding_count) + { + ERR("Available binding size of %u is insufficient for an offset of %u.\n", + base_range->vk_binding_count, rel_offset); + continue; + } + + range->set = base_range->set; + range->binding = base_range->binding; + range->vk_binding_count = base_range->vk_binding_count - rel_offset; + d3d12_root_signature_map_descriptor_unbounded_binding(root_signature, range, + rel_offset, shader_visibility, context); + continue; + } + else if (range->descriptor_count == UINT_MAX) + { + base_range = range; + } + + range->binding = context->descriptor_binding; + range->vk_binding_count = vk_binding_count_from_descriptor_range(range, + info, &device->vk_info.descriptor_limits); + + if (FAILED(hr = d3d12_root_signature_init_descriptor_array_binding(root_signature, + range, p->ShaderVisibility, context))) + return hr; + + continue; + } + + cur_binding = context->current_binding; + + vk_binding = d3d12_root_signature_assign_vk_bindings(root_signature, + range->type, range->register_space, range->base_register_idx, range->descriptor_count, false, true, + shader_visibility, context); + + /* Unroll descriptor range. */ + for (k = 0; k < range->descriptor_count; ++k) + { + uint32_t vk_current_binding = vk_binding + k; + + if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV + || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + { + vk_current_binding = vk_binding + 2 * k; + + /* Assign binding for image view. */ + if (!vk_binding_from_d3d12_descriptor_range(cur_binding, + range->type, p->ShaderVisibility, false, vk_current_binding + 1, 1)) + return E_NOTIMPL; + + ++cur_binding; + } + + if (!vk_binding_from_d3d12_descriptor_range(cur_binding, + range->type, p->ShaderVisibility, true, vk_current_binding, 1)) + return E_NOTIMPL; + + ++cur_binding; + } + + table->ranges[j].vk_binding_count = table->ranges[j].descriptor_count; + table->ranges[j].binding = vk_binding; + + context->current_binding = cur_binding; + } + } + + return S_OK; +} + +static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, + const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) +{ + VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; + unsigned int i; + + root_signature->push_descriptor_mask = 0; + + for (i = 0; i < desc->NumParameters; ++i) + { + const D3D12_ROOT_PARAMETER *p = &desc->pParameters[i]; + if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_CBV + && p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_SRV + && p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_UAV) + continue; + + root_signature->push_descriptor_mask |= 1u << i; + + cur_binding->binding = d3d12_root_signature_assign_vk_bindings(root_signature, + vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), + p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1, true, false, + vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context); + cur_binding->descriptorType = vk_descriptor_type_from_d3d12_root_parameter(p->ParameterType); + cur_binding->descriptorCount = 1; + cur_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility); + cur_binding->pImmutableSamplers = NULL; + + root_signature->parameters[i].parameter_type = p->ParameterType; + root_signature->parameters[i].u.descriptor.binding = cur_binding->binding; + + ++cur_binding; + } + + context->current_binding = cur_binding; + return S_OK; +} + +static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signature *root_signature, + struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc, + struct vkd3d_descriptor_set_context *context) +{ + VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; + unsigned int i; + HRESULT hr; + + assert(root_signature->static_sampler_count == desc->NumStaticSamplers); + for (i = 0; i < desc->NumStaticSamplers; ++i) + { + const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; + + if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) + return hr; + + cur_binding->binding = d3d12_root_signature_assign_vk_bindings(root_signature, + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, 1, false, false, + vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context); + cur_binding->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + cur_binding->descriptorCount = 1; + cur_binding->stageFlags = stage_flags_from_visibility(s->ShaderVisibility); + cur_binding->pImmutableSamplers = &root_signature->static_samplers[i]; + + ++cur_binding; + } + + context->current_binding = cur_binding; + return S_OK; +} + +static bool vk_binding_uses_partial_binding(const VkDescriptorSetLayoutBinding *binding) +{ + if (binding->descriptorCount == 1) + return false; + + switch (binding->descriptorType) + { + /* Types mapped in vk_descriptor_type_from_vkd3d_descriptor_type() from D3D12 SRV and UAV types, + * i.e. those which can be a buffer or an image. */ + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + return true; + default: + return false; + } +} + +static HRESULT vkd3d_create_descriptor_set_layout(struct d3d12_device *device, + VkDescriptorSetLayoutCreateFlags flags, unsigned int binding_count, bool unbounded, + const VkDescriptorSetLayoutBinding *bindings, VkDescriptorSetLayout *set_layout) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorSetLayoutBindingFlagsCreateInfoEXT flags_info; + VkDescriptorBindingFlagsEXT *set_flags = NULL; + VkDescriptorSetLayoutCreateInfo set_desc; + VkResult vr; + + set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + set_desc.pNext = NULL; + set_desc.flags = flags; + set_desc.bindingCount = binding_count; + set_desc.pBindings = bindings; + if (device->vk_info.EXT_descriptor_indexing) + { + unsigned int i; + + for (i = 0; i < binding_count; ++i) + if (unbounded || vk_binding_uses_partial_binding(&bindings[i])) + break; + + if (i < binding_count) + { + if (!(set_flags = vkd3d_malloc(binding_count * sizeof(*set_flags)))) + return E_OUTOFMEMORY; + + for (i = 0; i < binding_count; ++i) + set_flags[i] = vk_binding_uses_partial_binding(&bindings[i]) + ? VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT_EXT : 0; + + if (unbounded) + set_flags[binding_count - 1] = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT + | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT_EXT; + + flags_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT; + flags_info.pNext = NULL; + flags_info.bindingCount = binding_count; + flags_info.pBindingFlags = set_flags; + + set_desc.pNext = &flags_info; + } + } + + vr = VK_CALL(vkCreateDescriptorSetLayout(device->vk_device, &set_desc, NULL, set_layout)); + vkd3d_free(set_flags); + if (vr < 0) + { + WARN("Failed to create Vulkan descriptor set layout, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, + unsigned int set_layout_count, const VkDescriptorSetLayout *set_layouts, + unsigned int push_constant_count, const VkPushConstantRange *push_constants, + VkPipelineLayout *pipeline_layout) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct VkPipelineLayoutCreateInfo pipeline_layout_info; + VkResult vr; + + if (!vkd3d_validate_descriptor_set_count(device, set_layout_count)) + return E_INVALIDARG; + + pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_info.pNext = NULL; + pipeline_layout_info.flags = 0; + pipeline_layout_info.setLayoutCount = set_layout_count; + pipeline_layout_info.pSetLayouts = set_layouts; + pipeline_layout_info.pushConstantRangeCount = push_constant_count; + pipeline_layout_info.pPushConstantRanges = push_constants; + if ((vr = VK_CALL(vkCreatePipelineLayout(device->vk_device, + &pipeline_layout_info, NULL, pipeline_layout))) < 0) + { + WARN("Failed to create Vulkan pipeline layout, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struct d3d12_root_signature *root_signature, + VkDescriptorSetLayout *vk_set_layouts) +{ + unsigned int i; + + for (i = 0; i < root_signature->vk_set_count; ++i) + vk_set_layouts[i] = root_signature->descriptor_set_layouts[i].vk_layout; + + return i; +} + +static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signature, + struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc) +{ + VkDescriptorSetLayout vk_layouts[VKD3D_MAX_DESCRIPTOR_SETS]; + const struct vkd3d_vulkan_info *vk_info = &device->vk_info; + struct vkd3d_descriptor_set_context context; + VkDescriptorSetLayoutBinding *binding_desc; + struct d3d12_root_signature_info info; + unsigned int i; + HRESULT hr; + + memset(&context, 0, sizeof(context)); + context.unbounded_offset = UINT_MAX; + binding_desc = NULL; + + root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl; + root_signature->refcount = 1; + + root_signature->vk_pipeline_layout = VK_NULL_HANDLE; + root_signature->vk_set_count = 0; + root_signature->parameters = NULL; + root_signature->flags = desc->Flags; + root_signature->descriptor_mapping = NULL; + root_signature->descriptor_offsets = NULL; + root_signature->static_sampler_count = 0; + root_signature->static_samplers = NULL; + root_signature->device = device; + + if (desc->Flags & ~(D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT + | D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT)) + FIXME("Ignoring root signature flags %#x.\n", desc->Flags); + + if (FAILED(hr = d3d12_root_signature_info_from_desc(&info, desc, device->vk_info.EXT_descriptor_indexing))) + return hr; + if (info.cost > D3D12_MAX_ROOT_COST) + { + WARN("Root signature cost %zu exceeds maximum allowed cost.\n", info.cost); + return E_INVALIDARG; + } + + root_signature->binding_count = info.binding_count; + root_signature->static_sampler_count = desc->NumStaticSamplers; + root_signature->root_descriptor_count = info.root_descriptor_count; + root_signature->use_descriptor_arrays = device->vk_info.EXT_descriptor_indexing; + + hr = E_OUTOFMEMORY; + root_signature->parameter_count = desc->NumParameters; + if (!(root_signature->parameters = vkd3d_calloc(root_signature->parameter_count, + sizeof(*root_signature->parameters)))) + goto fail; + if (!(root_signature->descriptor_mapping = vkd3d_calloc(root_signature->binding_count, + sizeof(*root_signature->descriptor_mapping)))) + goto fail; + if (root_signature->use_descriptor_arrays && !(root_signature->descriptor_offsets = vkd3d_calloc( + root_signature->binding_count, sizeof(*root_signature->descriptor_offsets)))) + goto fail; + root_signature->root_constant_count = info.root_constant_count; + if (!(root_signature->root_constants = vkd3d_calloc(root_signature->root_constant_count, + sizeof(*root_signature->root_constants)))) + goto fail; + if (!(root_signature->static_samplers = vkd3d_calloc(root_signature->static_sampler_count, + sizeof(*root_signature->static_samplers)))) + goto fail; + + if (!(binding_desc = vkd3d_calloc(info.binding_count, sizeof(*binding_desc)))) + goto fail; + context.first_binding = binding_desc; + context.current_binding = binding_desc; + + if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context))) + goto fail; + + /* We use KHR_push_descriptor for root descriptor parameters. */ + if (vk_info->KHR_push_descriptor) + { + if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, + &context, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR))) + goto fail; + } + + root_signature->main_set = root_signature->vk_set_count; + + if (FAILED(hr = d3d12_root_signature_init_push_constants(root_signature, desc, + root_signature->push_constant_ranges, &root_signature->push_constant_range_count))) + goto fail; + if (FAILED(hr = d3d12_root_signature_init_static_samplers(root_signature, device, desc, &context))) + goto fail; + if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &info, &context))) + goto fail; + + if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0))) + goto fail; + + vkd3d_free(binding_desc); + binding_desc = NULL; + + i = d3d12_root_signature_copy_descriptor_set_layouts(root_signature, vk_layouts); + if (FAILED(hr = vkd3d_create_pipeline_layout(device, i, + vk_layouts, root_signature->push_constant_range_count, + root_signature->push_constant_ranges, &root_signature->vk_pipeline_layout))) + goto fail; + + if (FAILED(hr = vkd3d_private_store_init(&root_signature->private_store))) + goto fail; + + d3d12_device_add_ref(device); + + return S_OK; + +fail: + vkd3d_free(binding_desc); + d3d12_root_signature_cleanup(root_signature, device); + return hr; +} + +HRESULT d3d12_root_signature_create(struct d3d12_device *device, + const void *bytecode, size_t bytecode_length, struct d3d12_root_signature **root_signature) +{ + const struct vkd3d_shader_code dxbc = {bytecode, bytecode_length}; + union + { + D3D12_VERSIONED_ROOT_SIGNATURE_DESC d3d12; + struct vkd3d_shader_versioned_root_signature_desc vkd3d; + } root_signature_desc; + struct d3d12_root_signature *object; + HRESULT hr; + int ret; + + if ((ret = vkd3d_parse_root_signature_v_1_0(&dxbc, &root_signature_desc.vkd3d)) < 0) + { + WARN("Failed to parse root signature, vkd3d result %d.\n", ret); + return hresult_from_vkd3d_result(ret); + } + + if (!(object = vkd3d_malloc(sizeof(*object)))) + { + vkd3d_shader_free_root_signature(&root_signature_desc.vkd3d); + return E_OUTOFMEMORY; + } + + hr = d3d12_root_signature_init(object, device, &root_signature_desc.d3d12.u.Desc_1_0); + vkd3d_shader_free_root_signature(&root_signature_desc.vkd3d); + if (FAILED(hr)) + { + vkd3d_free(object); + return hr; + } + + TRACE("Created root signature %p.\n", object); + + *root_signature = object; + + return S_OK; +} + +/* vkd3d_render_pass_cache */ +struct vkd3d_render_pass_entry +{ + struct vkd3d_render_pass_key key; + VkRenderPass vk_render_pass; +}; + +STATIC_ASSERT(sizeof(struct vkd3d_render_pass_key) == 48); + +static HRESULT vkd3d_render_pass_cache_create_pass_locked(struct vkd3d_render_pass_cache *cache, + struct d3d12_device *device, const struct vkd3d_render_pass_key *key, VkRenderPass *vk_render_pass) +{ + VkAttachmentReference attachment_references[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT + 1]; + VkAttachmentDescription attachments[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT + 1]; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_render_pass_entry *entry; + unsigned int index, attachment_index; + VkSubpassDescription sub_pass_desc; + VkRenderPassCreateInfo pass_info; + bool have_depth_stencil; + unsigned int rt_count; + VkResult vr; + + if (!vkd3d_array_reserve((void **)&cache->render_passes, &cache->render_passes_size, + cache->render_pass_count + 1, sizeof(*cache->render_passes))) + { + *vk_render_pass = VK_NULL_HANDLE; + return E_OUTOFMEMORY; + } + + entry = &cache->render_passes[cache->render_pass_count]; + + entry->key = *key; + + have_depth_stencil = key->depth_enable || key->stencil_enable; + rt_count = have_depth_stencil ? key->attachment_count - 1 : key->attachment_count; + assert(rt_count <= D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT); + + for (index = 0, attachment_index = 0; index < rt_count; ++index) + { + if (!key->vk_formats[index]) + { + attachment_references[index].attachment = VK_ATTACHMENT_UNUSED; + attachment_references[index].layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + continue; + } + + attachments[attachment_index].flags = 0; + attachments[attachment_index].format = key->vk_formats[index]; + attachments[attachment_index].samples = key->sample_count; + attachments[attachment_index].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[attachment_index].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[attachment_index].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[attachment_index].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[attachment_index].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachments[attachment_index].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + attachment_references[index].attachment = attachment_index; + attachment_references[index].layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + ++attachment_index; + } + + if (have_depth_stencil) + { + VkImageLayout depth_layout = key->depth_stencil_write + ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + + attachments[attachment_index].flags = 0; + attachments[attachment_index].format = key->vk_formats[index]; + attachments[attachment_index].samples = key->sample_count; + + if (key->depth_enable) + { + attachments[attachment_index].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[attachment_index].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + } + else + { + attachments[attachment_index].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[attachment_index].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + } + if (key->stencil_enable) + { + attachments[attachment_index].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[attachment_index].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + } + else + { + attachments[attachment_index].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[attachment_index].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + } + attachments[attachment_index].initialLayout = depth_layout; + attachments[attachment_index].finalLayout = depth_layout; + + attachment_references[index].attachment = attachment_index; + attachment_references[index].layout = depth_layout; + + attachment_index++; + } + + sub_pass_desc.flags = 0; + sub_pass_desc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + sub_pass_desc.inputAttachmentCount = 0; + sub_pass_desc.pInputAttachments = NULL; + sub_pass_desc.colorAttachmentCount = rt_count; + sub_pass_desc.pColorAttachments = attachment_references; + sub_pass_desc.pResolveAttachments = NULL; + if (have_depth_stencil) + sub_pass_desc.pDepthStencilAttachment = &attachment_references[rt_count]; + else + sub_pass_desc.pDepthStencilAttachment = NULL; + sub_pass_desc.preserveAttachmentCount = 0; + sub_pass_desc.pPreserveAttachments = NULL; + + pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + pass_info.pNext = NULL; + pass_info.flags = 0; + pass_info.attachmentCount = attachment_index; + pass_info.pAttachments = attachments; + pass_info.subpassCount = 1; + pass_info.pSubpasses = &sub_pass_desc; + pass_info.dependencyCount = 0; + pass_info.pDependencies = NULL; + if ((vr = VK_CALL(vkCreateRenderPass(device->vk_device, &pass_info, NULL, vk_render_pass))) >= 0) + { + entry->vk_render_pass = *vk_render_pass; + ++cache->render_pass_count; + } + else + { + WARN("Failed to create Vulkan render pass, vr %d.\n", vr); + *vk_render_pass = VK_NULL_HANDLE; + } + + return hresult_from_vk_result(vr); +} + +HRESULT vkd3d_render_pass_cache_find(struct vkd3d_render_pass_cache *cache, + struct d3d12_device *device, const struct vkd3d_render_pass_key *key, VkRenderPass *vk_render_pass) +{ + bool found = false; + HRESULT hr = S_OK; + unsigned int i; + int rc; + + if ((rc = vkd3d_mutex_lock(&device->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + *vk_render_pass = VK_NULL_HANDLE; + return hresult_from_errno(rc); + } + + for (i = 0; i < cache->render_pass_count; ++i) + { + struct vkd3d_render_pass_entry *current = &cache->render_passes[i]; + + if (!memcmp(¤t->key, key, sizeof(*key))) + { + *vk_render_pass = current->vk_render_pass; + found = true; + break; + } + } + + if (!found) + hr = vkd3d_render_pass_cache_create_pass_locked(cache, device, key, vk_render_pass); + + vkd3d_mutex_unlock(&device->mutex); + + return hr; +} + +void vkd3d_render_pass_cache_init(struct vkd3d_render_pass_cache *cache) +{ + cache->render_passes = NULL; + cache->render_pass_count = 0; + cache->render_passes_size = 0; +} + +void vkd3d_render_pass_cache_cleanup(struct vkd3d_render_pass_cache *cache, + struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + unsigned int i; + + for (i = 0; i < cache->render_pass_count; ++i) + { + struct vkd3d_render_pass_entry *current = &cache->render_passes[i]; + VK_CALL(vkDestroyRenderPass(device->vk_device, current->vk_render_pass, NULL)); + } + + vkd3d_free(cache->render_passes); + cache->render_passes = NULL; +} + +struct vkd3d_pipeline_key +{ + D3D12_PRIMITIVE_TOPOLOGY topology; + uint32_t strides[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; + VkFormat dsv_format; +}; + +struct vkd3d_compiled_pipeline +{ + struct list entry; + struct vkd3d_pipeline_key key; + VkPipeline vk_pipeline; + VkRenderPass vk_render_pass; +}; + +/* ID3D12PipelineState */ +static inline struct d3d12_pipeline_state *impl_from_ID3D12PipelineState(ID3D12PipelineState *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_pipeline_state, ID3D12PipelineState_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_pipeline_state_QueryInterface(ID3D12PipelineState *iface, + REFIID riid, void **object) +{ + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + + if (IsEqualGUID(riid, &IID_ID3D12PipelineState) + || IsEqualGUID(riid, &IID_ID3D12Pageable) + || IsEqualGUID(riid, &IID_ID3D12DeviceChild) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { + ID3D12PipelineState_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_AddRef(ID3D12PipelineState *iface) +{ + struct d3d12_pipeline_state *state = impl_from_ID3D12PipelineState(iface); + ULONG refcount = InterlockedIncrement(&state->refcount); + + TRACE("%p increasing refcount to %u.\n", state, refcount); + + return refcount; +} + +static void d3d12_pipeline_state_destroy_graphics(struct d3d12_pipeline_state *state, + struct d3d12_device *device) +{ + struct d3d12_graphics_pipeline_state *graphics = &state->u.graphics; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_compiled_pipeline *current, *e; + unsigned int i; + + for (i = 0; i < graphics->stage_count; ++i) + { + VK_CALL(vkDestroyShaderModule(device->vk_device, graphics->stages[i].module, NULL)); + } + + LIST_FOR_EACH_ENTRY_SAFE(current, e, &graphics->compiled_pipelines, struct vkd3d_compiled_pipeline, entry) + { + VK_CALL(vkDestroyPipeline(device->vk_device, current->vk_pipeline, NULL)); + vkd3d_free(current); + } +} + +static void d3d12_pipeline_uav_counter_state_cleanup(struct d3d12_pipeline_uav_counter_state *uav_counters, + struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + if (uav_counters->vk_set_layout) + VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, uav_counters->vk_set_layout, NULL)); + if (uav_counters->vk_pipeline_layout) + VK_CALL(vkDestroyPipelineLayout(device->vk_device, uav_counters->vk_pipeline_layout, NULL)); + + vkd3d_free(uav_counters->bindings); +} + +static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState *iface) +{ + struct d3d12_pipeline_state *state = impl_from_ID3D12PipelineState(iface); + ULONG refcount = InterlockedDecrement(&state->refcount); + + TRACE("%p decreasing refcount to %u.\n", state, refcount); + + if (!refcount) + { + struct d3d12_device *device = state->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + vkd3d_private_store_destroy(&state->private_store); + + if (d3d12_pipeline_state_is_graphics(state)) + d3d12_pipeline_state_destroy_graphics(state, device); + else if (d3d12_pipeline_state_is_compute(state)) + VK_CALL(vkDestroyPipeline(device->vk_device, state->u.compute.vk_pipeline, NULL)); + + d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); + + vkd3d_free(state); + + d3d12_device_release(device); + } + + return refcount; +} + +static HRESULT STDMETHODCALLTYPE d3d12_pipeline_state_GetPrivateData(ID3D12PipelineState *iface, + REFGUID guid, UINT *data_size, void *data) +{ + struct d3d12_pipeline_state *state = impl_from_ID3D12PipelineState(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&state->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_pipeline_state_SetPrivateData(ID3D12PipelineState *iface, + REFGUID guid, UINT data_size, const void *data) +{ + struct d3d12_pipeline_state *state = impl_from_ID3D12PipelineState(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&state->private_store, guid, data_size, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_pipeline_state_SetPrivateDataInterface(ID3D12PipelineState *iface, + REFGUID guid, const IUnknown *data) +{ + struct d3d12_pipeline_state *state = impl_from_ID3D12PipelineState(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&state->private_store, guid, data); +} + +static HRESULT STDMETHODCALLTYPE d3d12_pipeline_state_SetName(ID3D12PipelineState *iface, const WCHAR *name) +{ + struct d3d12_pipeline_state *state = impl_from_ID3D12PipelineState(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, state->device->wchar_size)); + + if (d3d12_pipeline_state_is_compute(state)) + { + return vkd3d_set_vk_object_name(state->device, (uint64_t)state->u.compute.vk_pipeline, + VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT, name); + } + + return name ? S_OK : E_INVALIDARG; +} + +static HRESULT STDMETHODCALLTYPE d3d12_pipeline_state_GetDevice(ID3D12PipelineState *iface, + REFIID iid, void **device) +{ + struct d3d12_pipeline_state *state = impl_from_ID3D12PipelineState(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(state->device, iid, device); +} + +static HRESULT STDMETHODCALLTYPE d3d12_pipeline_state_GetCachedBlob(ID3D12PipelineState *iface, + ID3DBlob **blob) +{ + FIXME("iface %p, blob %p stub!\n", iface, blob); + + return E_NOTIMPL; +} + +static const struct ID3D12PipelineStateVtbl d3d12_pipeline_state_vtbl = +{ + /* IUnknown methods */ + d3d12_pipeline_state_QueryInterface, + d3d12_pipeline_state_AddRef, + d3d12_pipeline_state_Release, + /* ID3D12Object methods */ + d3d12_pipeline_state_GetPrivateData, + d3d12_pipeline_state_SetPrivateData, + d3d12_pipeline_state_SetPrivateDataInterface, + d3d12_pipeline_state_SetName, + /* ID3D12DeviceChild methods */ + d3d12_pipeline_state_GetDevice, + /* ID3D12PipelineState methods */ + d3d12_pipeline_state_GetCachedBlob, +}; + +struct d3d12_pipeline_state *unsafe_impl_from_ID3D12PipelineState(ID3D12PipelineState *iface) +{ + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_pipeline_state_vtbl); + return impl_from_ID3D12PipelineState(iface); +} + +static HRESULT create_shader_stage(struct d3d12_device *device, + struct VkPipelineShaderStageCreateInfo *stage_desc, enum VkShaderStageFlagBits stage, + const D3D12_SHADER_BYTECODE *code, const struct vkd3d_shader_interface_info *shader_interface) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_shader_compile_info compile_info; + struct VkShaderModuleCreateInfo shader_desc; + struct vkd3d_shader_code spirv = {0}; + VkResult vr; + int ret; + + static const struct vkd3d_shader_compile_option options[] = + { + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_3}, + }; + + stage_desc->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stage_desc->pNext = NULL; + stage_desc->flags = 0; + stage_desc->stage = stage; + stage_desc->pName = "main"; + stage_desc->pSpecializationInfo = NULL; + + shader_desc.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + shader_desc.pNext = NULL; + shader_desc.flags = 0; + + compile_info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; + compile_info.next = shader_interface; + compile_info.source.code = code->pShaderBytecode; + compile_info.source.size = code->BytecodeLength; + compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; + compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; + compile_info.options = options; + compile_info.option_count = ARRAY_SIZE(options); + compile_info.log_level = VKD3D_SHADER_LOG_NONE; + compile_info.source_name = NULL; + + if ((ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) + { + WARN("Failed to compile shader, vkd3d result %d.\n", ret); + return hresult_from_vkd3d_result(ret); + } + shader_desc.codeSize = spirv.size; + shader_desc.pCode = spirv.code; + + vr = VK_CALL(vkCreateShaderModule(device->vk_device, &shader_desc, NULL, &stage_desc->module)); + vkd3d_shader_free_shader_code(&spirv); + if (vr < 0) + { + WARN("Failed to create Vulkan shader module, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static int vkd3d_scan_dxbc(const D3D12_SHADER_BYTECODE *code, + struct vkd3d_shader_scan_descriptor_info *descriptor_info) +{ + struct vkd3d_shader_compile_info compile_info; + + static const struct vkd3d_shader_compile_option options[] = + { + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_3}, + }; + + compile_info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; + compile_info.next = descriptor_info; + compile_info.source.code = code->pShaderBytecode; + compile_info.source.size = code->BytecodeLength; + compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; + compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; + compile_info.options = options; + compile_info.option_count = ARRAY_SIZE(options); + compile_info.log_level = VKD3D_SHADER_LOG_NONE; + compile_info.source_name = NULL; + + return vkd3d_shader_scan(&compile_info, NULL); +} + +static HRESULT vkd3d_create_compute_pipeline(struct d3d12_device *device, + const D3D12_SHADER_BYTECODE *code, const struct vkd3d_shader_interface_info *shader_interface, + VkPipelineLayout vk_pipeline_layout, VkPipeline *vk_pipeline) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkComputePipelineCreateInfo pipeline_info; + VkResult vr; + HRESULT hr; + + pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + pipeline_info.pNext = NULL; + pipeline_info.flags = 0; + if (FAILED(hr = create_shader_stage(device, &pipeline_info.stage, + VK_SHADER_STAGE_COMPUTE_BIT, code, shader_interface))) + return hr; + pipeline_info.layout = vk_pipeline_layout; + pipeline_info.basePipelineHandle = VK_NULL_HANDLE; + pipeline_info.basePipelineIndex = -1; + + vr = VK_CALL(vkCreateComputePipelines(device->vk_device, + VK_NULL_HANDLE, 1, &pipeline_info, NULL, vk_pipeline)); + VK_CALL(vkDestroyShaderModule(device->vk_device, pipeline_info.stage.module, NULL)); + if (vr < 0) + { + WARN("Failed to create Vulkan compute pipeline, hr %#x.", hr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static HRESULT d3d12_pipeline_state_init_uav_counters(struct d3d12_pipeline_state *state, + struct d3d12_device *device, const struct d3d12_root_signature *root_signature, + const struct vkd3d_shader_scan_descriptor_info *shader_info, VkShaderStageFlags stage_flags) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorSetLayout set_layouts[VKD3D_MAX_DESCRIPTOR_SETS + 1]; + VkDescriptorSetLayoutBinding *binding_desc; + uint32_t set_index, descriptor_binding; + unsigned int uav_counter_count = 0; + unsigned int i, j; + HRESULT hr; + + assert(vkd3d_popcount(stage_flags) == 1); + + for (i = 0; i < shader_info->descriptor_count; ++i) + { + const struct vkd3d_shader_descriptor_info *d = &shader_info->descriptors[i]; + + if (d->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV + && (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER)) + ++uav_counter_count; + } + + if (!uav_counter_count) + return S_OK; + + /* It should be possible to support other stages in Vulkan, but in a graphics pipeline + * D3D12 currently only supports counters in pixel shaders, and handling multiple stages + * would be more complex. */ + if (!(stage_flags & (VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT))) + { + FIXME("Found a UAV counter for Vulkan shader stage %#x. UAV counters in a " + "graphics pipeline are only supported in pixel shaders.\n", stage_flags); + return E_INVALIDARG; + } + + if (!(binding_desc = vkd3d_calloc(uav_counter_count, sizeof(*binding_desc)))) + return E_OUTOFMEMORY; + if (!(state->uav_counters.bindings = vkd3d_calloc(uav_counter_count, sizeof(*state->uav_counters.bindings)))) + { + vkd3d_free(binding_desc); + return E_OUTOFMEMORY; + } + state->uav_counters.binding_count = uav_counter_count; + + descriptor_binding = 0; + set_index = d3d12_root_signature_copy_descriptor_set_layouts(root_signature, set_layouts); + + for (i = 0, j = 0; i < shader_info->descriptor_count; ++i) + { + const struct vkd3d_shader_descriptor_info *d = &shader_info->descriptors[i]; + + if (d->type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV + || !(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER)) + continue; + + state->uav_counters.bindings[j].register_space = d->register_space; + state->uav_counters.bindings[j].register_index = d->register_index; + state->uav_counters.bindings[j].shader_visibility = (stage_flags == VK_SHADER_STAGE_COMPUTE_BIT) + ? VKD3D_SHADER_VISIBILITY_COMPUTE : VKD3D_SHADER_VISIBILITY_PIXEL; + state->uav_counters.bindings[j].binding.set = set_index; + state->uav_counters.bindings[j].binding.binding = descriptor_binding; + state->uav_counters.bindings[j].binding.count = 1; + + binding_desc[j].binding = descriptor_binding; + binding_desc[j].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + binding_desc[j].descriptorCount = 1; + binding_desc[j].stageFlags = stage_flags; + binding_desc[j].pImmutableSamplers = NULL; + + ++descriptor_binding; + ++j; + } + + /* Create a descriptor set layout for UAV counters. */ + hr = vkd3d_create_descriptor_set_layout(device, 0, descriptor_binding, + false, binding_desc, &state->uav_counters.vk_set_layout); + vkd3d_free(binding_desc); + if (FAILED(hr)) + { + vkd3d_free(state->uav_counters.bindings); + return hr; + } + + /* Create a pipeline layout which is compatible for all other descriptor + * sets with the root signature's pipeline layout. + */ + state->uav_counters.set_index = set_index; + set_layouts[set_index++] = state->uav_counters.vk_set_layout; + if (FAILED(hr = vkd3d_create_pipeline_layout(device, set_index, set_layouts, + root_signature->push_constant_range_count, root_signature->push_constant_ranges, + &state->uav_counters.vk_pipeline_layout))) + { + VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, state->uav_counters.vk_set_layout, NULL)); + vkd3d_free(state->uav_counters.bindings); + return hr; + } + + return S_OK; +} + +static HRESULT d3d12_pipeline_state_find_and_init_uav_counters(struct d3d12_pipeline_state *state, + struct d3d12_device *device, const struct d3d12_root_signature *root_signature, + const D3D12_SHADER_BYTECODE *code, VkShaderStageFlags stage_flags) +{ + struct vkd3d_shader_scan_descriptor_info shader_info; + HRESULT hr; + int ret; + + shader_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; + shader_info.next = NULL; + if ((ret = vkd3d_scan_dxbc(code, &shader_info)) < 0) + { + WARN("Failed to scan shader bytecode, stage %#x, vkd3d result %d.\n", stage_flags, ret); + return hresult_from_vkd3d_result(ret); + } + + if (FAILED(hr = d3d12_pipeline_state_init_uav_counters(state, device, root_signature, &shader_info, stage_flags))) + WARN("Failed to create descriptor set layout for UAV counters, hr %#x.\n", hr); + + vkd3d_shader_free_scan_descriptor_info(&shader_info); + + return hr; +} + +static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *state, + struct d3d12_device *device, const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_shader_interface_info shader_interface; + struct vkd3d_shader_descriptor_offset_info offset_info; + const struct d3d12_root_signature *root_signature; + struct vkd3d_shader_spirv_target_info target_info; + VkPipelineLayout vk_pipeline_layout; + HRESULT hr; + + state->ID3D12PipelineState_iface.lpVtbl = &d3d12_pipeline_state_vtbl; + state->refcount = 1; + + memset(&state->uav_counters, 0, sizeof(state->uav_counters)); + + if (!(root_signature = unsafe_impl_from_ID3D12RootSignature(desc->pRootSignature))) + { + WARN("Root signature is NULL.\n"); + return E_INVALIDARG; + } + + if (FAILED(hr = d3d12_pipeline_state_find_and_init_uav_counters(state, device, root_signature, + &desc->CS, VK_SHADER_STAGE_COMPUTE_BIT))) + return hr; + + memset(&target_info, 0, sizeof(target_info)); + target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; + target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; + target_info.extensions = device->vk_info.shader_extensions; + target_info.extension_count = device->vk_info.shader_extension_count; + + if (root_signature->descriptor_offsets) + { + offset_info.type = VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO; + offset_info.next = NULL; + offset_info.descriptor_table_offset = 0; + offset_info.descriptor_table_count = 0; + offset_info.binding_offsets = root_signature->descriptor_offsets; + offset_info.uav_counter_offsets = NULL; + vkd3d_prepend_struct(&target_info, &offset_info); + } + + shader_interface.type = VKD3D_SHADER_STRUCTURE_TYPE_INTERFACE_INFO; + shader_interface.next = &target_info; + shader_interface.bindings = root_signature->descriptor_mapping; + shader_interface.binding_count = root_signature->binding_count; + shader_interface.push_constant_buffers = root_signature->root_constants; + shader_interface.push_constant_buffer_count = root_signature->root_constant_count; + shader_interface.combined_samplers = NULL; + shader_interface.combined_sampler_count = 0; + shader_interface.uav_counters = state->uav_counters.bindings; + shader_interface.uav_counter_count = state->uav_counters.binding_count; + + vk_pipeline_layout = state->uav_counters.vk_pipeline_layout + ? state->uav_counters.vk_pipeline_layout : root_signature->vk_pipeline_layout; + if (FAILED(hr = vkd3d_create_compute_pipeline(device, &desc->CS, &shader_interface, + vk_pipeline_layout, &state->u.compute.vk_pipeline))) + { + WARN("Failed to create Vulkan compute pipeline, hr %#x.\n", hr); + d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); + return hr; + } + + if (FAILED(hr = vkd3d_private_store_init(&state->private_store))) + { + VK_CALL(vkDestroyPipeline(device->vk_device, state->u.compute.vk_pipeline, NULL)); + d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); + return hr; + } + + state->vk_bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + d3d12_device_add_ref(state->device = device); + + return S_OK; +} + +HRESULT d3d12_pipeline_state_create_compute(struct d3d12_device *device, + const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, struct d3d12_pipeline_state **state) +{ + struct d3d12_pipeline_state *object; + HRESULT hr; + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_pipeline_state_init_compute(object, device, desc))) + { + vkd3d_free(object); + return hr; + } + + TRACE("Created compute pipeline state %p.\n", object); + + *state = object; + + return S_OK; +} + +static enum VkPolygonMode vk_polygon_mode_from_d3d12(D3D12_FILL_MODE mode) +{ + switch (mode) + { + case D3D12_FILL_MODE_WIREFRAME: + return VK_POLYGON_MODE_LINE; + case D3D12_FILL_MODE_SOLID: + return VK_POLYGON_MODE_FILL; + default: + FIXME("Unhandled fill mode %#x.\n", mode); + return VK_POLYGON_MODE_FILL; + } +} + +static enum VkCullModeFlagBits vk_cull_mode_from_d3d12(D3D12_CULL_MODE mode) +{ + switch (mode) + { + case D3D12_CULL_MODE_NONE: + return VK_CULL_MODE_NONE; + case D3D12_CULL_MODE_FRONT: + return VK_CULL_MODE_FRONT_BIT; + case D3D12_CULL_MODE_BACK: + return VK_CULL_MODE_BACK_BIT; + default: + FIXME("Unhandled cull mode %#x.\n", mode); + return VK_CULL_MODE_NONE; + } +} + +static void rs_desc_from_d3d12(VkPipelineRasterizationStateCreateInfo *vk_desc, + const D3D12_RASTERIZER_DESC *d3d12_desc) +{ + vk_desc->sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + vk_desc->pNext = NULL; + vk_desc->flags = 0; + vk_desc->depthClampEnable = !d3d12_desc->DepthClipEnable; + vk_desc->rasterizerDiscardEnable = VK_FALSE; + vk_desc->polygonMode = vk_polygon_mode_from_d3d12(d3d12_desc->FillMode); + vk_desc->cullMode = vk_cull_mode_from_d3d12(d3d12_desc->CullMode); + vk_desc->frontFace = d3d12_desc->FrontCounterClockwise ? VK_FRONT_FACE_COUNTER_CLOCKWISE : VK_FRONT_FACE_CLOCKWISE; + vk_desc->depthBiasEnable = d3d12_desc->DepthBias || d3d12_desc->SlopeScaledDepthBias; + vk_desc->depthBiasConstantFactor = d3d12_desc->DepthBias; + vk_desc->depthBiasClamp = d3d12_desc->DepthBiasClamp; + vk_desc->depthBiasSlopeFactor = d3d12_desc->SlopeScaledDepthBias; + vk_desc->lineWidth = 1.0f; + + if (d3d12_desc->MultisampleEnable) + FIXME_ONCE("Ignoring MultisampleEnable %#x.\n", d3d12_desc->MultisampleEnable); + if (d3d12_desc->AntialiasedLineEnable) + FIXME_ONCE("Ignoring AntialiasedLineEnable %#x.\n", d3d12_desc->AntialiasedLineEnable); + if (d3d12_desc->ForcedSampleCount) + FIXME("Ignoring ForcedSampleCount %#x.\n", d3d12_desc->ForcedSampleCount); + if (d3d12_desc->ConservativeRaster) + FIXME("Ignoring ConservativeRaster %#x.\n", d3d12_desc->ConservativeRaster); +} + +static void rs_depth_clip_info_from_d3d12(VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_info, + VkPipelineRasterizationStateCreateInfo *vk_rs_desc, const D3D12_RASTERIZER_DESC *d3d12_desc) +{ + vk_rs_desc->depthClampEnable = VK_TRUE; + + depth_clip_info->sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT; + depth_clip_info->pNext = NULL; + depth_clip_info->flags = 0; + depth_clip_info->depthClipEnable = d3d12_desc->DepthClipEnable; + + vk_prepend_struct(vk_rs_desc, depth_clip_info); +} + +static void rs_stream_info_from_d3d12(VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info, + VkPipelineRasterizationStateCreateInfo *vk_rs_desc, const D3D12_STREAM_OUTPUT_DESC *so_desc, + const struct vkd3d_vulkan_info *vk_info) +{ + if (!so_desc->NumEntries || !so_desc->RasterizedStream + || so_desc->RasterizedStream == D3D12_SO_NO_RASTERIZED_STREAM) + return; + + if (!vk_info->rasterization_stream) + { + FIXME("Rasterization stream select is not supported by Vulkan implementation.\n"); + return; + } + + stream_info->sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT; + stream_info->pNext = NULL; + stream_info->flags = 0; + stream_info->rasterizationStream = so_desc->RasterizedStream; + + vk_prepend_struct(vk_rs_desc, stream_info); +} + +static enum VkStencilOp vk_stencil_op_from_d3d12(D3D12_STENCIL_OP op) +{ + switch (op) + { + case D3D12_STENCIL_OP_KEEP: + return VK_STENCIL_OP_KEEP; + case D3D12_STENCIL_OP_ZERO: + return VK_STENCIL_OP_ZERO; + case D3D12_STENCIL_OP_REPLACE: + return VK_STENCIL_OP_REPLACE; + case D3D12_STENCIL_OP_INCR_SAT: + return VK_STENCIL_OP_INCREMENT_AND_CLAMP; + case D3D12_STENCIL_OP_DECR_SAT: + return VK_STENCIL_OP_DECREMENT_AND_CLAMP; + case D3D12_STENCIL_OP_INVERT: + return VK_STENCIL_OP_INVERT; + case D3D12_STENCIL_OP_INCR: + return VK_STENCIL_OP_INCREMENT_AND_WRAP; + case D3D12_STENCIL_OP_DECR: + return VK_STENCIL_OP_DECREMENT_AND_WRAP; + default: + FIXME("Unhandled stencil op %#x.\n", op); + return VK_STENCIL_OP_KEEP; + } +} + +enum VkCompareOp vk_compare_op_from_d3d12(D3D12_COMPARISON_FUNC op) +{ + switch (op) + { + case D3D12_COMPARISON_FUNC_NEVER: + return VK_COMPARE_OP_NEVER; + case D3D12_COMPARISON_FUNC_LESS: + return VK_COMPARE_OP_LESS; + case D3D12_COMPARISON_FUNC_EQUAL: + return VK_COMPARE_OP_EQUAL; + case D3D12_COMPARISON_FUNC_LESS_EQUAL: + return VK_COMPARE_OP_LESS_OR_EQUAL; + case D3D12_COMPARISON_FUNC_GREATER: + return VK_COMPARE_OP_GREATER; + case D3D12_COMPARISON_FUNC_NOT_EQUAL: + return VK_COMPARE_OP_NOT_EQUAL; + case D3D12_COMPARISON_FUNC_GREATER_EQUAL: + return VK_COMPARE_OP_GREATER_OR_EQUAL; + case D3D12_COMPARISON_FUNC_ALWAYS: + return VK_COMPARE_OP_ALWAYS; + default: + FIXME("Unhandled compare op %#x.\n", op); + return VK_COMPARE_OP_NEVER; + } +} + +static void vk_stencil_op_state_from_d3d12(struct VkStencilOpState *vk_desc, + const D3D12_DEPTH_STENCILOP_DESC *d3d12_desc, uint32_t compare_mask, uint32_t write_mask) +{ + vk_desc->failOp = vk_stencil_op_from_d3d12(d3d12_desc->StencilFailOp); + vk_desc->passOp = vk_stencil_op_from_d3d12(d3d12_desc->StencilPassOp); + vk_desc->depthFailOp = vk_stencil_op_from_d3d12(d3d12_desc->StencilDepthFailOp); + vk_desc->compareOp = vk_compare_op_from_d3d12(d3d12_desc->StencilFunc); + vk_desc->compareMask = compare_mask; + vk_desc->writeMask = write_mask; + /* The stencil reference value is a dynamic state. Set by OMSetStencilRef(). */ + vk_desc->reference = 0; +} + +static void ds_desc_from_d3d12(struct VkPipelineDepthStencilStateCreateInfo *vk_desc, + const D3D12_DEPTH_STENCIL_DESC *d3d12_desc) +{ + memset(vk_desc, 0, sizeof(*vk_desc)); + vk_desc->sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + vk_desc->pNext = NULL; + vk_desc->flags = 0; + if ((vk_desc->depthTestEnable = d3d12_desc->DepthEnable)) + { + vk_desc->depthWriteEnable = d3d12_desc->DepthWriteMask & D3D12_DEPTH_WRITE_MASK_ALL; + vk_desc->depthCompareOp = vk_compare_op_from_d3d12(d3d12_desc->DepthFunc); + } + else + { + vk_desc->depthWriteEnable = VK_FALSE; + vk_desc->depthCompareOp = VK_COMPARE_OP_NEVER; + } + vk_desc->depthBoundsTestEnable = VK_FALSE; + if ((vk_desc->stencilTestEnable = d3d12_desc->StencilEnable)) + { + vk_stencil_op_state_from_d3d12(&vk_desc->front, &d3d12_desc->FrontFace, + d3d12_desc->StencilReadMask, d3d12_desc->StencilWriteMask); + vk_stencil_op_state_from_d3d12(&vk_desc->back, &d3d12_desc->BackFace, + d3d12_desc->StencilReadMask, d3d12_desc->StencilWriteMask); + } + else + { + memset(&vk_desc->front, 0, sizeof(vk_desc->front)); + memset(&vk_desc->back, 0, sizeof(vk_desc->back)); + } + vk_desc->minDepthBounds = 0.0f; + vk_desc->maxDepthBounds = 1.0f; +} + +static enum VkBlendFactor vk_blend_factor_from_d3d12(D3D12_BLEND blend, bool alpha) +{ + switch (blend) + { + case D3D12_BLEND_ZERO: + return VK_BLEND_FACTOR_ZERO; + case D3D12_BLEND_ONE: + return VK_BLEND_FACTOR_ONE; + case D3D12_BLEND_SRC_COLOR: + return VK_BLEND_FACTOR_SRC_COLOR; + case D3D12_BLEND_INV_SRC_COLOR: + return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; + case D3D12_BLEND_SRC_ALPHA: + return VK_BLEND_FACTOR_SRC_ALPHA; + case D3D12_BLEND_INV_SRC_ALPHA: + return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + case D3D12_BLEND_DEST_ALPHA: + return VK_BLEND_FACTOR_DST_ALPHA; + case D3D12_BLEND_INV_DEST_ALPHA: + return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; + case D3D12_BLEND_DEST_COLOR: + return VK_BLEND_FACTOR_DST_COLOR; + case D3D12_BLEND_INV_DEST_COLOR: + return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; + case D3D12_BLEND_SRC_ALPHA_SAT: + return VK_BLEND_FACTOR_SRC_ALPHA_SATURATE; + case D3D12_BLEND_BLEND_FACTOR: + if (alpha) + return VK_BLEND_FACTOR_CONSTANT_ALPHA; + return VK_BLEND_FACTOR_CONSTANT_COLOR; + case D3D12_BLEND_INV_BLEND_FACTOR: + if (alpha) + return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; + return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; + case D3D12_BLEND_SRC1_COLOR: + return VK_BLEND_FACTOR_SRC1_COLOR; + case D3D12_BLEND_INV_SRC1_COLOR: + return VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR; + case D3D12_BLEND_SRC1_ALPHA: + return VK_BLEND_FACTOR_SRC1_ALPHA; + case D3D12_BLEND_INV_SRC1_ALPHA: + return VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA; + default: + FIXME("Unhandled blend %#x.\n", blend); + return VK_BLEND_FACTOR_ZERO; + } +} + +static enum VkBlendOp vk_blend_op_from_d3d12(D3D12_BLEND_OP op) +{ + switch (op) + { + case D3D12_BLEND_OP_ADD: + return VK_BLEND_OP_ADD; + case D3D12_BLEND_OP_SUBTRACT: + return VK_BLEND_OP_SUBTRACT; + case D3D12_BLEND_OP_REV_SUBTRACT: + return VK_BLEND_OP_REVERSE_SUBTRACT; + case D3D12_BLEND_OP_MIN: + return VK_BLEND_OP_MIN; + case D3D12_BLEND_OP_MAX: + return VK_BLEND_OP_MAX; + default: + FIXME("Unhandled blend op %#x.\n", op); + return VK_BLEND_OP_ADD; + } +} + +static void blend_attachment_from_d3d12(struct VkPipelineColorBlendAttachmentState *vk_desc, + const D3D12_RENDER_TARGET_BLEND_DESC *d3d12_desc) +{ + if (d3d12_desc->BlendEnable) + { + vk_desc->blendEnable = VK_TRUE; + vk_desc->srcColorBlendFactor = vk_blend_factor_from_d3d12(d3d12_desc->SrcBlend, false); + vk_desc->dstColorBlendFactor = vk_blend_factor_from_d3d12(d3d12_desc->DestBlend, false); + vk_desc->colorBlendOp = vk_blend_op_from_d3d12(d3d12_desc->BlendOp); + vk_desc->srcAlphaBlendFactor = vk_blend_factor_from_d3d12(d3d12_desc->SrcBlendAlpha, true); + vk_desc->dstAlphaBlendFactor = vk_blend_factor_from_d3d12(d3d12_desc->DestBlendAlpha, true); + vk_desc->alphaBlendOp = vk_blend_op_from_d3d12(d3d12_desc->BlendOpAlpha); + } + else + { + memset(vk_desc, 0, sizeof(*vk_desc)); + } + vk_desc->colorWriteMask = 0; + if (d3d12_desc->RenderTargetWriteMask & D3D12_COLOR_WRITE_ENABLE_RED) + vk_desc->colorWriteMask |= VK_COLOR_COMPONENT_R_BIT; + if (d3d12_desc->RenderTargetWriteMask & D3D12_COLOR_WRITE_ENABLE_GREEN) + vk_desc->colorWriteMask |= VK_COLOR_COMPONENT_G_BIT; + if (d3d12_desc->RenderTargetWriteMask & D3D12_COLOR_WRITE_ENABLE_BLUE) + vk_desc->colorWriteMask |= VK_COLOR_COMPONENT_B_BIT; + if (d3d12_desc->RenderTargetWriteMask & D3D12_COLOR_WRITE_ENABLE_ALPHA) + vk_desc->colorWriteMask |= VK_COLOR_COMPONENT_A_BIT; +} + +static bool is_dual_source_blending_blend(D3D12_BLEND b) +{ + return b == D3D12_BLEND_SRC1_COLOR || b == D3D12_BLEND_INV_SRC1_COLOR + || b == D3D12_BLEND_SRC1_ALPHA || b == D3D12_BLEND_INV_SRC1_ALPHA; +} + +static bool is_dual_source_blending(const D3D12_RENDER_TARGET_BLEND_DESC *desc) +{ + return desc->BlendEnable + && (is_dual_source_blending_blend(desc->SrcBlend) + || is_dual_source_blending_blend(desc->DestBlend) + || is_dual_source_blending_blend(desc->SrcBlendAlpha) + || is_dual_source_blending_blend(desc->DestBlendAlpha)); +} + +static HRESULT compute_input_layout_offsets(const struct d3d12_device *device, + const D3D12_INPUT_LAYOUT_DESC *input_layout_desc, uint32_t *offsets) +{ + uint32_t input_slot_offsets[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT] = {0}; + const D3D12_INPUT_ELEMENT_DESC *e; + const struct vkd3d_format *format; + unsigned int i; + + if (input_layout_desc->NumElements > D3D12_VS_INPUT_REGISTER_COUNT) + { + FIXME("InputLayout.NumElements %u > %u, ignoring extra elements.\n", + input_layout_desc->NumElements, D3D12_VS_INPUT_REGISTER_COUNT); + } + + for (i = 0; i < min(input_layout_desc->NumElements, D3D12_VS_INPUT_REGISTER_COUNT); ++i) + { + e = &input_layout_desc->pInputElementDescs[i]; + + if (e->InputSlot >= ARRAY_SIZE(input_slot_offsets)) + { + WARN("Invalid input slot %#x.\n", e->InputSlot); + return E_INVALIDARG; + } + + /* TODO: DXGI_FORMAT_UNKNOWN will return a format with byte_count == 1, + * which may not match driver behaviour (return E_INVALIDARG?). */ + if (!(format = vkd3d_get_format(device, e->Format, false))) + { + WARN("Invalid input element format %#x.\n", e->Format); + return E_INVALIDARG; + } + + if (e->AlignedByteOffset != D3D12_APPEND_ALIGNED_ELEMENT) + offsets[i] = e->AlignedByteOffset; + else + offsets[i] = input_slot_offsets[e->InputSlot]; + + input_slot_offsets[e->InputSlot] = align(offsets[i] + format->byte_count, 4); + } + + return S_OK; +} + +static unsigned int vkd3d_get_rt_format_swizzle(const struct vkd3d_format *format) +{ + if (format->dxgi_format == DXGI_FORMAT_A8_UNORM) + return VKD3D_SHADER_SWIZZLE(W, X, Y, Z); + + return VKD3D_SHADER_NO_SWIZZLE; +} + +STATIC_ASSERT(sizeof(struct vkd3d_shader_transform_feedback_element) == sizeof(D3D12_SO_DECLARATION_ENTRY)); + +static HRESULT d3d12_graphics_pipeline_state_create_render_pass( + struct d3d12_graphics_pipeline_state *graphics, struct d3d12_device *device, + VkFormat dynamic_dsv_format, VkRenderPass *vk_render_pass) +{ + struct vkd3d_render_pass_key key; + VkFormat dsv_format; + unsigned int i; + + memcpy(key.vk_formats, graphics->rtv_formats, sizeof(graphics->rtv_formats)); + key.attachment_count = graphics->rt_count; + + if (!(dsv_format = graphics->dsv_format) && (graphics->null_attachment_mask & dsv_attachment_mask(graphics))) + dsv_format = dynamic_dsv_format; + + if (dsv_format) + { + assert(graphics->ds_desc.front.writeMask == graphics->ds_desc.back.writeMask); + key.depth_enable = graphics->ds_desc.depthTestEnable; + key.stencil_enable = graphics->ds_desc.stencilTestEnable; + key.depth_stencil_write = graphics->ds_desc.depthWriteEnable + || graphics->ds_desc.front.writeMask; + key.vk_formats[key.attachment_count++] = dsv_format; + } + else + { + key.depth_enable = false; + key.stencil_enable = false; + key.depth_stencil_write = false; + } + + if (key.attachment_count != ARRAY_SIZE(key.vk_formats)) + key.vk_formats[ARRAY_SIZE(key.vk_formats) - 1] = VK_FORMAT_UNDEFINED; + for (i = key.attachment_count; i < ARRAY_SIZE(key.vk_formats); ++i) + assert(key.vk_formats[i] == VK_FORMAT_UNDEFINED); + + key.padding = 0; + key.sample_count = graphics->ms_desc.rasterizationSamples; + + return vkd3d_render_pass_cache_find(&device->render_pass_cache, device, &key, vk_render_pass); +} + +static VkLogicOp vk_logic_op_from_d3d12(D3D12_LOGIC_OP op) +{ + switch (op) + { + case D3D12_LOGIC_OP_CLEAR: + return VK_LOGIC_OP_CLEAR; + case D3D12_LOGIC_OP_SET: + return VK_LOGIC_OP_SET; + case D3D12_LOGIC_OP_COPY: + return VK_LOGIC_OP_COPY; + case D3D12_LOGIC_OP_COPY_INVERTED: + return VK_LOGIC_OP_COPY_INVERTED; + case D3D12_LOGIC_OP_NOOP: + return VK_LOGIC_OP_NO_OP; + case D3D12_LOGIC_OP_INVERT: + return VK_LOGIC_OP_INVERT; + case D3D12_LOGIC_OP_AND: + return VK_LOGIC_OP_AND; + case D3D12_LOGIC_OP_NAND: + return VK_LOGIC_OP_NAND; + case D3D12_LOGIC_OP_OR: + return VK_LOGIC_OP_OR; + case D3D12_LOGIC_OP_NOR: + return VK_LOGIC_OP_NOR; + case D3D12_LOGIC_OP_XOR: + return VK_LOGIC_OP_XOR; + case D3D12_LOGIC_OP_EQUIV: + return VK_LOGIC_OP_EQUIVALENT; + case D3D12_LOGIC_OP_AND_REVERSE: + return VK_LOGIC_OP_AND_REVERSE; + case D3D12_LOGIC_OP_AND_INVERTED: + return VK_LOGIC_OP_AND_INVERTED; + case D3D12_LOGIC_OP_OR_REVERSE: + return VK_LOGIC_OP_OR_REVERSE; + case D3D12_LOGIC_OP_OR_INVERTED: + return VK_LOGIC_OP_OR_INVERTED; + default: + FIXME("Unhandled logic op %#x.\n", op); + return VK_LOGIC_OP_NO_OP; + } +} + +static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *state, + struct d3d12_device *device, const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc) +{ + unsigned int ps_output_swizzle[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT]; + struct d3d12_graphics_pipeline_state *graphics = &state->u.graphics; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + const D3D12_STREAM_OUTPUT_DESC *so_desc = &desc->StreamOutput; + VkVertexInputBindingDivisorDescriptionEXT *binding_divisor; + const struct vkd3d_vulkan_info *vk_info = &device->vk_info; + uint32_t instance_divisors[D3D12_VS_INPUT_REGISTER_COUNT]; + struct vkd3d_shader_spirv_target_info *stage_target_info; + uint32_t aligned_offsets[D3D12_VS_INPUT_REGISTER_COUNT]; + struct vkd3d_shader_descriptor_offset_info offset_info; + struct vkd3d_shader_parameter ps_shader_parameters[1]; + struct vkd3d_shader_transform_feedback_info xfb_info; + struct vkd3d_shader_spirv_target_info ps_target_info; + struct vkd3d_shader_interface_info shader_interface; + struct vkd3d_shader_spirv_target_info target_info; + const struct d3d12_root_signature *root_signature; + struct vkd3d_shader_signature input_signature; + bool have_attachment, is_dsv_format_unknown; + VkShaderStageFlagBits xfb_stage = 0; + VkSampleCountFlagBits sample_count; + const struct vkd3d_format *format; + unsigned int instance_divisor; + VkVertexInputRate input_rate; + unsigned int i, j; + size_t rt_count; + uint32_t mask; + HRESULT hr; + int ret; + + static const DWORD default_ps_code[] = + { +#if 0 + ps_4_0 + ret +#endif + 0x43425844, 0x19cbf606, 0x18f562b9, 0xdaeed4db, 0xc324aa46, 0x00000001, 0x00000060, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x52444853, 0x0000000c, 0x00000040, 0x00000003, 0x0100003e, + }; + static const D3D12_SHADER_BYTECODE default_ps = {default_ps_code, sizeof(default_ps_code)}; + static const struct + { + enum VkShaderStageFlagBits stage; + ptrdiff_t offset; + } + shader_stages[] = + { + {VK_SHADER_STAGE_VERTEX_BIT, offsetof(D3D12_GRAPHICS_PIPELINE_STATE_DESC, VS)}, + {VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, offsetof(D3D12_GRAPHICS_PIPELINE_STATE_DESC, HS)}, + {VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, offsetof(D3D12_GRAPHICS_PIPELINE_STATE_DESC, DS)}, + {VK_SHADER_STAGE_GEOMETRY_BIT, offsetof(D3D12_GRAPHICS_PIPELINE_STATE_DESC, GS)}, + {VK_SHADER_STAGE_FRAGMENT_BIT, offsetof(D3D12_GRAPHICS_PIPELINE_STATE_DESC, PS)}, + }; + + state->ID3D12PipelineState_iface.lpVtbl = &d3d12_pipeline_state_vtbl; + state->refcount = 1; + + memset(&state->uav_counters, 0, sizeof(state->uav_counters)); + graphics->stage_count = 0; + + memset(&input_signature, 0, sizeof(input_signature)); + + for (i = desc->NumRenderTargets; i < ARRAY_SIZE(desc->RTVFormats); ++i) + { + if (desc->RTVFormats[i] != DXGI_FORMAT_UNKNOWN) + { + WARN("Format must be set to DXGI_FORMAT_UNKNOWN for inactive render targets.\n"); + return E_INVALIDARG; + } + } + + if (!(root_signature = unsafe_impl_from_ID3D12RootSignature(desc->pRootSignature))) + { + WARN("Root signature is NULL.\n"); + return E_INVALIDARG; + } + + sample_count = vk_samples_from_dxgi_sample_desc(&desc->SampleDesc); + if (desc->SampleDesc.Count != 1 && desc->SampleDesc.Quality) + WARN("Ignoring sample quality %u.\n", desc->SampleDesc.Quality); + + rt_count = desc->NumRenderTargets; + if (rt_count > ARRAY_SIZE(graphics->blend_attachments)) + { + FIXME("NumRenderTargets %zu > %zu, ignoring extra formats.\n", + rt_count, ARRAY_SIZE(graphics->blend_attachments)); + rt_count = ARRAY_SIZE(graphics->blend_attachments); + } + + graphics->om_logic_op_enable = desc->BlendState.RenderTarget[0].LogicOpEnable + && device->feature_options.OutputMergerLogicOp; + graphics->om_logic_op = graphics->om_logic_op_enable + ? vk_logic_op_from_d3d12(desc->BlendState.RenderTarget[0].LogicOp) + : VK_LOGIC_OP_COPY; + if (desc->BlendState.RenderTarget[0].LogicOpEnable && !graphics->om_logic_op_enable) + WARN("The device does not support output merger logic ops. Ignoring logic op %#x.\n", + desc->BlendState.RenderTarget[0].LogicOp); + + graphics->null_attachment_mask = 0; + for (i = 0; i < rt_count; ++i) + { + const D3D12_RENDER_TARGET_BLEND_DESC *rt_desc; + + if (desc->RTVFormats[i] == DXGI_FORMAT_UNKNOWN) + { + graphics->null_attachment_mask |= 1u << i; + ps_output_swizzle[i] = VKD3D_SHADER_NO_SWIZZLE; + graphics->rtv_formats[i] = VK_FORMAT_UNDEFINED; + } + else if ((format = vkd3d_get_format(device, desc->RTVFormats[i], false))) + { + ps_output_swizzle[i] = vkd3d_get_rt_format_swizzle(format); + graphics->rtv_formats[i] = format->vk_format; + } + else + { + WARN("Invalid RTV format %#x.\n", desc->RTVFormats[i]); + hr = E_INVALIDARG; + goto fail; + } + + rt_desc = &desc->BlendState.RenderTarget[desc->BlendState.IndependentBlendEnable ? i : 0]; + if (desc->BlendState.IndependentBlendEnable && rt_desc->LogicOpEnable) + { + WARN("IndependentBlendEnable must be FALSE when logic operations are enabled.\n"); + hr = E_INVALIDARG; + goto fail; + } + if (rt_desc->BlendEnable && rt_desc->LogicOpEnable) + { + WARN("Only one of BlendEnable or LogicOpEnable can be set to TRUE."); + hr = E_INVALIDARG; + goto fail; + } + + blend_attachment_from_d3d12(&graphics->blend_attachments[i], rt_desc); + } + for (i = rt_count; i < ARRAY_SIZE(graphics->rtv_formats); ++i) + graphics->rtv_formats[i] = VK_FORMAT_UNDEFINED; + graphics->rt_count = rt_count; + + ds_desc_from_d3d12(&graphics->ds_desc, &desc->DepthStencilState); + if (desc->DSVFormat == DXGI_FORMAT_UNKNOWN + && graphics->ds_desc.depthTestEnable && !graphics->ds_desc.depthWriteEnable + && graphics->ds_desc.depthCompareOp == VK_COMPARE_OP_ALWAYS && !graphics->ds_desc.stencilTestEnable) + { + TRACE("Disabling depth test.\n"); + graphics->ds_desc.depthTestEnable = VK_FALSE; + } + + graphics->dsv_format = VK_FORMAT_UNDEFINED; + if (graphics->ds_desc.depthTestEnable || graphics->ds_desc.stencilTestEnable) + { + if (desc->DSVFormat == DXGI_FORMAT_UNKNOWN) + { + WARN("DSV format is DXGI_FORMAT_UNKNOWN.\n"); + graphics->dsv_format = VK_FORMAT_UNDEFINED; + graphics->null_attachment_mask |= dsv_attachment_mask(graphics); + } + else if ((format = vkd3d_get_format(device, desc->DSVFormat, true))) + { + if (!(format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) + FIXME("Format %#x is not depth/stencil format.\n", format->dxgi_format); + + graphics->dsv_format = format->vk_format; + } + else + { + WARN("Invalid DSV format %#x.\n", desc->DSVFormat); + hr = E_INVALIDARG; + goto fail; + } + + if (!desc->PS.pShaderBytecode) + { + if (FAILED(hr = create_shader_stage(device, &graphics->stages[graphics->stage_count], + VK_SHADER_STAGE_FRAGMENT_BIT, &default_ps, NULL))) + goto fail; + + ++graphics->stage_count; + } + } + + ps_shader_parameters[0].name = VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT; + ps_shader_parameters[0].type = VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT; + ps_shader_parameters[0].data_type = VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32; + ps_shader_parameters[0].u.immediate_constant.u.u32 = sample_count; + + ps_target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; + ps_target_info.next = NULL; + ps_target_info.entry_point = "main"; + ps_target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; + ps_target_info.extensions = vk_info->shader_extensions; + ps_target_info.extension_count = vk_info->shader_extension_count; + ps_target_info.parameters = ps_shader_parameters; + ps_target_info.parameter_count = ARRAY_SIZE(ps_shader_parameters); + ps_target_info.dual_source_blending = is_dual_source_blending(&desc->BlendState.RenderTarget[0]); + ps_target_info.output_swizzles = ps_output_swizzle; + ps_target_info.output_swizzle_count = rt_count; + + if (ps_target_info.dual_source_blending && rt_count > 1) + { + WARN("Only one render target is allowed when dual source blending is used.\n"); + hr = E_INVALIDARG; + goto fail; + } + if (ps_target_info.dual_source_blending && desc->BlendState.IndependentBlendEnable) + { + for (i = 1; i < ARRAY_SIZE(desc->BlendState.RenderTarget); ++i) + { + if (desc->BlendState.RenderTarget[i].BlendEnable) + { + WARN("Blend enable cannot be set for render target %u when dual source blending is used.\n", i); + hr = E_INVALIDARG; + goto fail; + } + } + } + + memset(&target_info, 0, sizeof(target_info)); + target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; + target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; + target_info.extensions = vk_info->shader_extensions; + target_info.extension_count = vk_info->shader_extension_count; + + graphics->xfb_enabled = false; + if (so_desc->NumEntries) + { + if (!(root_signature->flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT)) + { + WARN("Stream output is used without D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT.\n"); + hr = E_INVALIDARG; + goto fail; + } + + if (!vk_info->EXT_transform_feedback) + { + FIXME("Transform feedback is not supported by Vulkan implementation.\n"); + hr = E_NOTIMPL; + goto fail; + } + + graphics->xfb_enabled = true; + + xfb_info.type = VKD3D_SHADER_STRUCTURE_TYPE_TRANSFORM_FEEDBACK_INFO; + xfb_info.next = NULL; + + xfb_info.elements = (const struct vkd3d_shader_transform_feedback_element *)so_desc->pSODeclaration; + xfb_info.element_count = so_desc->NumEntries; + xfb_info.buffer_strides = so_desc->pBufferStrides; + xfb_info.buffer_stride_count = so_desc->NumStrides; + + if (desc->GS.pShaderBytecode) + xfb_stage = VK_SHADER_STAGE_GEOMETRY_BIT; + else if (desc->DS.pShaderBytecode) + xfb_stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + else + xfb_stage = VK_SHADER_STAGE_VERTEX_BIT; + } + + shader_interface.type = VKD3D_SHADER_STRUCTURE_TYPE_INTERFACE_INFO; + shader_interface.next = NULL; + shader_interface.bindings = root_signature->descriptor_mapping; + shader_interface.binding_count = root_signature->binding_count; + shader_interface.push_constant_buffers = root_signature->root_constants; + shader_interface.push_constant_buffer_count = root_signature->root_constant_count; + shader_interface.combined_samplers = NULL; + shader_interface.combined_sampler_count = 0; + + if (root_signature->descriptor_offsets) + { + offset_info.type = VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO; + offset_info.next = NULL; + offset_info.descriptor_table_offset = 0; + offset_info.descriptor_table_count = 0; + offset_info.binding_offsets = root_signature->descriptor_offsets; + offset_info.uav_counter_offsets = NULL; + } + + for (i = 0; i < ARRAY_SIZE(shader_stages); ++i) + { + const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); + const struct vkd3d_shader_code dxbc = {b->pShaderBytecode, b->BytecodeLength}; + + if (!b->pShaderBytecode) + continue; + + if (FAILED(hr = d3d12_pipeline_state_find_and_init_uav_counters(state, device, root_signature, + b, shader_stages[i].stage))) + goto fail; + + shader_interface.uav_counters = NULL; + shader_interface.uav_counter_count = 0; + stage_target_info = &target_info; + switch (shader_stages[i].stage) + { + case VK_SHADER_STAGE_VERTEX_BIT: + if ((ret = vkd3d_shader_parse_input_signature(&dxbc, &input_signature, NULL)) < 0) + { + hr = hresult_from_vkd3d_result(ret); + goto fail; + } + break; + + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: + if (desc->PrimitiveTopologyType != D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH) + { + WARN("D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH must be used with tessellation shaders.\n"); + hr = E_INVALIDARG; + goto fail; + } + break; + + case VK_SHADER_STAGE_GEOMETRY_BIT: + break; + + case VK_SHADER_STAGE_FRAGMENT_BIT: + shader_interface.uav_counters = state->uav_counters.bindings; + shader_interface.uav_counter_count = state->uav_counters.binding_count; + stage_target_info = &ps_target_info; + break; + + default: + hr = E_INVALIDARG; + goto fail; + } + + shader_interface.next = NULL; + xfb_info.next = NULL; + ps_target_info.next = NULL; + target_info.next = NULL; + offset_info.next = NULL; + if (shader_stages[i].stage == xfb_stage) + vkd3d_prepend_struct(&shader_interface, &xfb_info); + vkd3d_prepend_struct(&shader_interface, stage_target_info); + if (root_signature->descriptor_offsets) + vkd3d_prepend_struct(&shader_interface, &offset_info); + + if (FAILED(hr = create_shader_stage(device, &graphics->stages[graphics->stage_count], + shader_stages[i].stage, b, &shader_interface))) + goto fail; + + ++graphics->stage_count; + } + + graphics->attribute_count = desc->InputLayout.NumElements; + if (graphics->attribute_count > ARRAY_SIZE(graphics->attributes)) + { + FIXME("InputLayout.NumElements %zu > %zu, ignoring extra elements.\n", + graphics->attribute_count, ARRAY_SIZE(graphics->attributes)); + graphics->attribute_count = ARRAY_SIZE(graphics->attributes); + } + + if (graphics->attribute_count + && !(root_signature->flags & D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)) + { + WARN("Input layout is used without D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT.\n"); + hr = E_INVALIDARG; + goto fail; + } + + if (FAILED(hr = compute_input_layout_offsets(device, &desc->InputLayout, aligned_offsets))) + goto fail; + + graphics->instance_divisor_count = 0; + for (i = 0, j = 0, mask = 0; i < graphics->attribute_count; ++i) + { + const D3D12_INPUT_ELEMENT_DESC *e = &desc->InputLayout.pInputElementDescs[i]; + const struct vkd3d_shader_signature_element *signature_element; + + /* TODO: DXGI_FORMAT_UNKNOWN will succeed here, which may not match + * driver behaviour (return E_INVALIDARG?). */ + if (!(format = vkd3d_get_format(device, e->Format, false))) + { + WARN("Invalid input element format %#x.\n", e->Format); + hr = E_INVALIDARG; + goto fail; + } + + if (e->InputSlot >= ARRAY_SIZE(graphics->input_rates) + || e->InputSlot >= ARRAY_SIZE(instance_divisors)) + { + WARN("Invalid input slot %#x.\n", e->InputSlot); + hr = E_INVALIDARG; + goto fail; + } + + if (!(signature_element = vkd3d_shader_find_signature_element(&input_signature, + e->SemanticName, e->SemanticIndex, 0))) + { + WARN("Unused input element %u.\n", i); + continue; + } + + graphics->attributes[j].location = signature_element->register_index; + graphics->attributes[j].binding = e->InputSlot; + graphics->attributes[j].format = format->vk_format; + if (e->AlignedByteOffset != D3D12_APPEND_ALIGNED_ELEMENT) + graphics->attributes[j].offset = e->AlignedByteOffset; + else + graphics->attributes[j].offset = aligned_offsets[i]; + ++j; + + switch (e->InputSlotClass) + { + case D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA: + input_rate = VK_VERTEX_INPUT_RATE_VERTEX; + instance_divisor = 1; + break; + + case D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA: + input_rate = VK_VERTEX_INPUT_RATE_INSTANCE; + instance_divisor = e->InstanceDataStepRate; + if (instance_divisor > vk_info->max_vertex_attrib_divisor + || (!instance_divisor && !vk_info->vertex_attrib_zero_divisor)) + { + FIXME("Instance divisor %u not supported by Vulkan implementation.\n", instance_divisor); + instance_divisor = 1; + } + break; + + default: + FIXME("Unhandled input slot class %#x on input element %u.\n", e->InputSlotClass, i); + hr = E_INVALIDARG; + goto fail; + } + + if (mask & (1u << e->InputSlot) && (graphics->input_rates[e->InputSlot] != input_rate + || instance_divisors[e->InputSlot] != instance_divisor)) + { + FIXME("Input slot rate %#x, instance divisor %u on input element %u conflicts " + "with earlier input slot rate %#x, instance divisor %u.\n", + input_rate, instance_divisor, e->InputSlot, + graphics->input_rates[e->InputSlot], instance_divisors[e->InputSlot]); + hr = E_INVALIDARG; + goto fail; + } + + graphics->input_rates[e->InputSlot] = input_rate; + instance_divisors[e->InputSlot] = instance_divisor; + if (instance_divisor != 1 && !(mask & (1u << e->InputSlot))) + { + binding_divisor = &graphics->instance_divisors[graphics->instance_divisor_count++]; + binding_divisor->binding = e->InputSlot; + binding_divisor->divisor = instance_divisor; + } + mask |= 1u << e->InputSlot; + } + graphics->attribute_count = j; + vkd3d_shader_free_shader_signature(&input_signature); + + switch (desc->IBStripCutValue) + { + case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED: + case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF: + case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF: + graphics->index_buffer_strip_cut_value = desc->IBStripCutValue; + break; + default: + WARN("Invalid index buffer strip cut value %#x.\n", desc->IBStripCutValue); + hr = E_INVALIDARG; + goto fail; + } + + is_dsv_format_unknown = graphics->null_attachment_mask & dsv_attachment_mask(graphics); + + rs_desc_from_d3d12(&graphics->rs_desc, &desc->RasterizerState); + have_attachment = graphics->rt_count || graphics->dsv_format || is_dsv_format_unknown; + if ((!have_attachment && !(desc->PS.pShaderBytecode && desc->PS.BytecodeLength)) + || (graphics->xfb_enabled && so_desc->RasterizedStream == D3D12_SO_NO_RASTERIZED_STREAM)) + graphics->rs_desc.rasterizerDiscardEnable = VK_TRUE; + + rs_stream_info_from_d3d12(&graphics->rs_stream_info, &graphics->rs_desc, so_desc, vk_info); + if (vk_info->EXT_depth_clip_enable) + rs_depth_clip_info_from_d3d12(&graphics->rs_depth_clip_info, &graphics->rs_desc, &desc->RasterizerState); + + graphics->ms_desc.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + graphics->ms_desc.pNext = NULL; + graphics->ms_desc.flags = 0; + graphics->ms_desc.rasterizationSamples = sample_count; + graphics->ms_desc.sampleShadingEnable = VK_FALSE; + graphics->ms_desc.minSampleShading = 0.0f; + graphics->ms_desc.pSampleMask = NULL; + if (desc->SampleMask != ~0u) + { + assert(DIV_ROUND_UP(sample_count, 32) <= ARRAY_SIZE(graphics->sample_mask)); + graphics->sample_mask[0] = desc->SampleMask; + graphics->sample_mask[1] = 0xffffffffu; + graphics->ms_desc.pSampleMask = graphics->sample_mask; + } + graphics->ms_desc.alphaToCoverageEnable = desc->BlendState.AlphaToCoverageEnable; + graphics->ms_desc.alphaToOneEnable = VK_FALSE; + + /* We defer creating the render pass for pipelines wth DSVFormat equal to + * DXGI_FORMAT_UNKNOWN. We take the actual DSV format from the bound DSV. */ + if (is_dsv_format_unknown) + graphics->render_pass = VK_NULL_HANDLE; + else if (FAILED(hr = d3d12_graphics_pipeline_state_create_render_pass(graphics, + device, 0, &graphics->render_pass))) + goto fail; + + graphics->root_signature = root_signature; + + list_init(&graphics->compiled_pipelines); + + if (FAILED(hr = vkd3d_private_store_init(&state->private_store))) + goto fail; + + state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; + d3d12_device_add_ref(state->device = device); + + return S_OK; + +fail: + for (i = 0; i < graphics->stage_count; ++i) + { + VK_CALL(vkDestroyShaderModule(device->vk_device, state->u.graphics.stages[i].module, NULL)); + } + vkd3d_shader_free_shader_signature(&input_signature); + + d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); + + return hr; +} + +HRESULT d3d12_pipeline_state_create_graphics(struct d3d12_device *device, + const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, struct d3d12_pipeline_state **state) +{ + struct d3d12_pipeline_state *object; + HRESULT hr; + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_pipeline_state_init_graphics(object, device, desc))) + { + vkd3d_free(object); + return hr; + } + + TRACE("Created graphics pipeline state %p.\n", object); + + *state = object; + + return S_OK; +} + +static enum VkPrimitiveTopology vk_topology_from_d3d12_topology(D3D12_PRIMITIVE_TOPOLOGY topology) +{ + switch (topology) + { + case D3D_PRIMITIVE_TOPOLOGY_POINTLIST: + return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + case D3D_PRIMITIVE_TOPOLOGY_LINELIST: + return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + case D3D_PRIMITIVE_TOPOLOGY_LINESTRIP: + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + case D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + case D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + case D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_2_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_5_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_6_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_7_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_8_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_9_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_10_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_11_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_12_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_13_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_14_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_15_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_16_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_17_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_18_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_19_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_20_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_21_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_22_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_23_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_24_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_25_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_26_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_27_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_28_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_29_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_30_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_31_CONTROL_POINT_PATCHLIST: + case D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST: + return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + case D3D_PRIMITIVE_TOPOLOGY_UNDEFINED: + return VK_PRIMITIVE_TOPOLOGY_MAX_ENUM; + default: + FIXME("Unhandled primitive topology %#x.\n", topology); + return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + } +} + +static bool vk_topology_can_restart(VkPrimitiveTopology topology) +{ + switch (topology) + { + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: + return false; + + default: + return true; + } +} + +static VkPipeline d3d12_pipeline_state_find_compiled_pipeline(const struct d3d12_pipeline_state *state, + const struct vkd3d_pipeline_key *key, VkRenderPass *vk_render_pass) +{ + const struct d3d12_graphics_pipeline_state *graphics = &state->u.graphics; + struct d3d12_device *device = state->device; + VkPipeline vk_pipeline = VK_NULL_HANDLE; + struct vkd3d_compiled_pipeline *current; + int rc; + + *vk_render_pass = VK_NULL_HANDLE; + + if (!(rc = vkd3d_mutex_lock(&device->mutex))) + { + LIST_FOR_EACH_ENTRY(current, &graphics->compiled_pipelines, struct vkd3d_compiled_pipeline, entry) + { + if (!memcmp(¤t->key, key, sizeof(*key))) + { + vk_pipeline = current->vk_pipeline; + *vk_render_pass = current->vk_render_pass; + break; + } + } + vkd3d_mutex_unlock(&device->mutex); + } + else + { + ERR("Failed to lock mutex, error %d.\n", rc); + } + + return vk_pipeline; +} + +static bool d3d12_pipeline_state_put_pipeline_to_cache(struct d3d12_pipeline_state *state, + const struct vkd3d_pipeline_key *key, VkPipeline vk_pipeline, VkRenderPass vk_render_pass) +{ + struct d3d12_graphics_pipeline_state *graphics = &state->u.graphics; + struct vkd3d_compiled_pipeline *compiled_pipeline, *current; + struct d3d12_device *device = state->device; + int rc; + + if (!(compiled_pipeline = vkd3d_malloc(sizeof(*compiled_pipeline)))) + return false; + + compiled_pipeline->key = *key; + compiled_pipeline->vk_pipeline = vk_pipeline; + compiled_pipeline->vk_render_pass = vk_render_pass; + + if ((rc = vkd3d_mutex_lock(&device->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + vkd3d_free(compiled_pipeline); + return false; + } + + LIST_FOR_EACH_ENTRY(current, &graphics->compiled_pipelines, struct vkd3d_compiled_pipeline, entry) + { + if (!memcmp(¤t->key, key, sizeof(*key))) + { + vkd3d_free(compiled_pipeline); + compiled_pipeline = NULL; + break; + } + } + + if (compiled_pipeline) + list_add_tail(&graphics->compiled_pipelines, &compiled_pipeline->entry); + + vkd3d_mutex_unlock(&device->mutex); + return compiled_pipeline; +} + +VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_state *state, + D3D12_PRIMITIVE_TOPOLOGY topology, const uint32_t *strides, VkFormat dsv_format, + VkRenderPass *vk_render_pass) +{ + VkVertexInputBindingDescription bindings[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; + const struct vkd3d_vk_device_procs *vk_procs = &state->device->vk_procs; + struct d3d12_graphics_pipeline_state *graphics = &state->u.graphics; + VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_info; + VkPipelineTessellationStateCreateInfo tessellation_info; + VkPipelineVertexInputStateCreateInfo input_desc; + VkPipelineInputAssemblyStateCreateInfo ia_desc; + VkPipelineColorBlendStateCreateInfo blend_desc; + struct d3d12_device *device = state->device; + VkGraphicsPipelineCreateInfo pipeline_desc; + struct vkd3d_pipeline_key pipeline_key; + size_t binding_count = 0; + VkPipeline vk_pipeline; + unsigned int i; + uint32_t mask; + VkResult vr; + HRESULT hr; + + static const VkPipelineViewportStateCreateInfo vp_desc = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .viewportCount = 1, + .pViewports = NULL, + .scissorCount = 1, + .pScissors = NULL, + }; + static const VkDynamicState dynamic_states[] = + { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }; + static const VkPipelineDynamicStateCreateInfo dynamic_desc = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .dynamicStateCount = ARRAY_SIZE(dynamic_states), + .pDynamicStates = dynamic_states, + }; + + assert(d3d12_pipeline_state_is_graphics(state)); + + memset(&pipeline_key, 0, sizeof(pipeline_key)); + pipeline_key.topology = topology; + + for (i = 0, mask = 0; i < graphics->attribute_count; ++i) + { + struct VkVertexInputBindingDescription *b; + uint32_t binding; + + binding = graphics->attributes[i].binding; + if (mask & (1u << binding)) + continue; + + if (binding_count == ARRAY_SIZE(bindings)) + { + FIXME("Maximum binding count exceeded.\n"); + break; + } + + mask |= 1u << binding; + b = &bindings[binding_count]; + b->binding = binding; + b->stride = strides[binding]; + b->inputRate = graphics->input_rates[binding]; + + pipeline_key.strides[binding_count] = strides[binding]; + + ++binding_count; + } + + pipeline_key.dsv_format = dsv_format; + + if ((vk_pipeline = d3d12_pipeline_state_find_compiled_pipeline(state, &pipeline_key, vk_render_pass))) + return vk_pipeline; + + input_desc.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + input_desc.pNext = NULL; + input_desc.flags = 0; + input_desc.vertexBindingDescriptionCount = binding_count; + input_desc.pVertexBindingDescriptions = bindings; + input_desc.vertexAttributeDescriptionCount = graphics->attribute_count; + input_desc.pVertexAttributeDescriptions = graphics->attributes; + + if (graphics->instance_divisor_count) + { + input_desc.pNext = &input_divisor_info; + input_divisor_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT; + input_divisor_info.pNext = NULL; + input_divisor_info.vertexBindingDivisorCount = graphics->instance_divisor_count; + input_divisor_info.pVertexBindingDivisors = graphics->instance_divisors; + } + + ia_desc.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia_desc.pNext = NULL; + ia_desc.flags = 0; + ia_desc.topology = vk_topology_from_d3d12_topology(topology); + ia_desc.primitiveRestartEnable = graphics->index_buffer_strip_cut_value + && vk_topology_can_restart(ia_desc.topology); + + if (ia_desc.topology == VK_PRIMITIVE_TOPOLOGY_MAX_ENUM) + { + WARN("Primitive topology is undefined.\n"); + return VK_NULL_HANDLE; + } + + tessellation_info.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; + tessellation_info.pNext = NULL; + tessellation_info.flags = 0; + tessellation_info.patchControlPoints + = max(topology - D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + 1, 1); + + blend_desc.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + blend_desc.pNext = NULL; + blend_desc.flags = 0; + blend_desc.logicOpEnable = graphics->om_logic_op_enable; + blend_desc.logicOp = graphics->om_logic_op; + blend_desc.attachmentCount = graphics->rt_count; + blend_desc.pAttachments = graphics->blend_attachments; + blend_desc.blendConstants[0] = D3D12_DEFAULT_BLEND_FACTOR_RED; + blend_desc.blendConstants[1] = D3D12_DEFAULT_BLEND_FACTOR_GREEN; + blend_desc.blendConstants[2] = D3D12_DEFAULT_BLEND_FACTOR_BLUE; + blend_desc.blendConstants[3] = D3D12_DEFAULT_BLEND_FACTOR_ALPHA; + + pipeline_desc.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_desc.pNext = NULL; + pipeline_desc.flags = 0; + pipeline_desc.stageCount = graphics->stage_count; + pipeline_desc.pStages = graphics->stages; + pipeline_desc.pVertexInputState = &input_desc; + pipeline_desc.pInputAssemblyState = &ia_desc; + pipeline_desc.pTessellationState = &tessellation_info; + pipeline_desc.pViewportState = &vp_desc; + pipeline_desc.pRasterizationState = &graphics->rs_desc; + pipeline_desc.pMultisampleState = &graphics->ms_desc; + pipeline_desc.pDepthStencilState = &graphics->ds_desc; + pipeline_desc.pColorBlendState = &blend_desc; + pipeline_desc.pDynamicState = &dynamic_desc; + pipeline_desc.layout = state->uav_counters.vk_pipeline_layout ? state->uav_counters.vk_pipeline_layout + : graphics->root_signature->vk_pipeline_layout; + pipeline_desc.subpass = 0; + pipeline_desc.basePipelineHandle = VK_NULL_HANDLE; + pipeline_desc.basePipelineIndex = -1; + + /* Create a render pass for pipelines with DXGI_FORMAT_UNKNOWN. */ + if (!(pipeline_desc.renderPass = graphics->render_pass)) + { + if (graphics->null_attachment_mask & dsv_attachment_mask(graphics)) + TRACE("Compiling %p with DSV format %#x.\n", state, dsv_format); + + if (FAILED(hr = d3d12_graphics_pipeline_state_create_render_pass(graphics, device, dsv_format, + &pipeline_desc.renderPass))) + return VK_NULL_HANDLE; + } + + *vk_render_pass = pipeline_desc.renderPass; + + if ((vr = VK_CALL(vkCreateGraphicsPipelines(device->vk_device, device->vk_pipeline_cache, + 1, &pipeline_desc, NULL, &vk_pipeline))) < 0) + { + WARN("Failed to create Vulkan graphics pipeline, vr %d.\n", vr); + return VK_NULL_HANDLE; + } + + if (d3d12_pipeline_state_put_pipeline_to_cache(state, &pipeline_key, vk_pipeline, pipeline_desc.renderPass)) + return vk_pipeline; + + /* Other thread compiled the pipeline before us. */ + VK_CALL(vkDestroyPipeline(device->vk_device, vk_pipeline, NULL)); + vk_pipeline = d3d12_pipeline_state_find_compiled_pipeline(state, &pipeline_key, vk_render_pass); + if (!vk_pipeline) + ERR("Could not get the pipeline compiled by other thread from the cache.\n"); + return vk_pipeline; +} + +static void vkd3d_uav_clear_pipelines_cleanup(struct vkd3d_uav_clear_pipelines *pipelines, + struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + VK_CALL(vkDestroyPipeline(device->vk_device, pipelines->image_3d, NULL)); + VK_CALL(vkDestroyPipeline(device->vk_device, pipelines->image_2d_array, NULL)); + VK_CALL(vkDestroyPipeline(device->vk_device, pipelines->image_2d, NULL)); + VK_CALL(vkDestroyPipeline(device->vk_device, pipelines->image_1d_array, NULL)); + VK_CALL(vkDestroyPipeline(device->vk_device, pipelines->image_1d, NULL)); + VK_CALL(vkDestroyPipeline(device->vk_device, pipelines->buffer, NULL)); +} + +void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + vkd3d_uav_clear_pipelines_cleanup(&state->pipelines_uint, device); + vkd3d_uav_clear_pipelines_cleanup(&state->pipelines_float, device); + + VK_CALL(vkDestroyPipelineLayout(device->vk_device, state->vk_pipeline_layout_image, NULL)); + VK_CALL(vkDestroyPipelineLayout(device->vk_device, state->vk_pipeline_layout_buffer, NULL)); + + VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, state->vk_set_layout_image, NULL)); + VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, state->vk_set_layout_buffer, NULL)); +} + +HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device) +{ + struct vkd3d_shader_push_constant_buffer push_constant; + struct vkd3d_shader_interface_info shader_interface; + struct vkd3d_shader_resource_binding binding; + VkDescriptorSetLayoutBinding set_binding; + VkPushConstantRange push_constant_range; + unsigned int i; + HRESULT hr; + + const struct + { + VkDescriptorSetLayout *set_layout; + VkPipelineLayout *pipeline_layout; + VkDescriptorType descriptor_type; + } + set_layouts[] = + { + {&state->vk_set_layout_buffer, &state->vk_pipeline_layout_buffer, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER}, + {&state->vk_set_layout_image, &state->vk_pipeline_layout_image, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE}, + }; + + const struct + { + VkPipeline *pipeline; + VkPipelineLayout *pipeline_layout; + D3D12_SHADER_BYTECODE code; + } + pipelines[] = + { +#define SHADER_CODE(name) {name, sizeof(name)} + {&state->pipelines_float.buffer, &state->vk_pipeline_layout_buffer, + SHADER_CODE(cs_uav_clear_buffer_float_code)}, + {&state->pipelines_float.image_1d, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_1d_float_code)}, + {&state->pipelines_float.image_1d_array, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_1d_array_float_code)}, + {&state->pipelines_float.image_2d, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_2d_float_code)}, + {&state->pipelines_float.image_2d_array, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_2d_array_float_code)}, + {&state->pipelines_float.image_3d, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_3d_float_code)}, + + {&state->pipelines_uint.buffer, &state->vk_pipeline_layout_buffer, + SHADER_CODE(cs_uav_clear_buffer_uint_code)}, + {&state->pipelines_uint.image_1d, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_1d_uint_code)}, + {&state->pipelines_uint.image_1d_array, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_1d_array_uint_code)}, + {&state->pipelines_uint.image_2d, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_2d_uint_code)}, + {&state->pipelines_uint.image_2d_array, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_2d_array_uint_code)}, + {&state->pipelines_uint.image_3d, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_3d_uint_code)}, +#undef SHADER_CODE + }; + + memset(state, 0, sizeof(*state)); + + set_binding.binding = 0; + set_binding.descriptorCount = 1; + set_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + set_binding.pImmutableSamplers = NULL; + + binding.type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; + binding.register_space = 0; + binding.register_index = 0; + binding.shader_visibility = VKD3D_SHADER_VISIBILITY_COMPUTE; + binding.binding.set = 0; + binding.binding.binding = 0; + binding.binding.count = 1; + + push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + push_constant_range.offset = 0; + push_constant_range.size = sizeof(struct vkd3d_uav_clear_args); + + push_constant.register_space = 0; + push_constant.register_index = 0; + push_constant.shader_visibility = VKD3D_SHADER_VISIBILITY_COMPUTE; + push_constant.offset = 0; + push_constant.size = sizeof(struct vkd3d_uav_clear_args); + + for (i = 0; i < ARRAY_SIZE(set_layouts); ++i) + { + set_binding.descriptorType = set_layouts[i].descriptor_type; + + if (FAILED(hr = vkd3d_create_descriptor_set_layout(device, 0, + 1, false, &set_binding, set_layouts[i].set_layout))) + { + ERR("Failed to create descriptor set layout %u, hr %#x.\n", i, hr); + goto fail; + } + + if (FAILED(hr = vkd3d_create_pipeline_layout(device, 1, set_layouts[i].set_layout, + 1, &push_constant_range, set_layouts[i].pipeline_layout))) + { + ERR("Failed to create pipeline layout %u, hr %#x.\n", i, hr); + goto fail; + } + } + + shader_interface.type = VKD3D_SHADER_STRUCTURE_TYPE_INTERFACE_INFO; + shader_interface.next = NULL; + shader_interface.bindings = &binding; + shader_interface.binding_count = 1; + shader_interface.push_constant_buffers = &push_constant; + shader_interface.push_constant_buffer_count = 1; + shader_interface.combined_samplers = NULL; + shader_interface.combined_sampler_count = 0; + shader_interface.uav_counters = NULL; + shader_interface.uav_counter_count = 0; + + for (i = 0; i < ARRAY_SIZE(pipelines); ++i) + { + if (pipelines[i].pipeline_layout == &state->vk_pipeline_layout_buffer) + binding.flags = VKD3D_SHADER_BINDING_FLAG_BUFFER; + else + binding.flags = VKD3D_SHADER_BINDING_FLAG_IMAGE; + + if (FAILED(hr = vkd3d_create_compute_pipeline(device, &pipelines[i].code, &shader_interface, + *pipelines[i].pipeline_layout, pipelines[i].pipeline))) + { + ERR("Failed to create compute pipeline %u, hr %#x.\n", i, hr); + goto fail; + } + } + + return S_OK; + +fail: + vkd3d_uav_clear_state_cleanup(state, device); + return hr; +} diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c new file mode 100644 index 00000000000..5f67c8b95aa --- /dev/null +++ b/libs/vkd3d/libs/vkd3d/utils.c @@ -0,0 +1,1063 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_private.h" + +#include + +#define COLOR (VK_IMAGE_ASPECT_COLOR_BIT) +#define DEPTH (VK_IMAGE_ASPECT_DEPTH_BIT) +#define STENCIL (VK_IMAGE_ASPECT_STENCIL_BIT) +#define DEPTH_STENCIL (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) +#define TYPELESS VKD3D_FORMAT_TYPE_TYPELESS +#define SINT VKD3D_FORMAT_TYPE_SINT +#define UINT VKD3D_FORMAT_TYPE_UINT +static const struct vkd3d_format vkd3d_formats[] = +{ + {DXGI_FORMAT_UNKNOWN, VK_FORMAT_UNDEFINED, 1, 1, 1, 1}, + {DXGI_FORMAT_R32G32B32A32_TYPELESS, VK_FORMAT_R32G32B32A32_SFLOAT, 16, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_R32G32B32A32_FLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, 16, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R32G32B32A32_UINT, VK_FORMAT_R32G32B32A32_UINT, 16, 1, 1, 1, COLOR, 1, UINT}, + {DXGI_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32G32B32A32_SINT, 16, 1, 1, 1, COLOR, 1, SINT}, + {DXGI_FORMAT_R32G32B32_TYPELESS, VK_FORMAT_R32G32B32_SFLOAT, 12, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_R32G32B32_FLOAT, VK_FORMAT_R32G32B32_SFLOAT, 12, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32_UINT, 12, 1, 1, 1, COLOR, 1, UINT}, + {DXGI_FORMAT_R32G32B32_SINT, VK_FORMAT_R32G32B32_SINT, 12, 1, 1, 1, COLOR, 1, SINT}, + {DXGI_FORMAT_R16G16B16A16_TYPELESS, VK_FORMAT_R16G16B16A16_SFLOAT, 8, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_R16G16B16A16_FLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, 8, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R16G16B16A16_UNORM, 8, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R16G16B16A16_UINT, VK_FORMAT_R16G16B16A16_UINT, 8, 1, 1, 1, COLOR, 1, UINT}, + {DXGI_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_SNORM, 8, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SINT, 8, 1, 1, 1, COLOR, 1, SINT}, + {DXGI_FORMAT_R32G32_TYPELESS, VK_FORMAT_R32G32_SFLOAT, 8, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_R32G32_FLOAT, VK_FORMAT_R32G32_SFLOAT, 8, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R32G32_UINT, VK_FORMAT_R32G32_UINT, 8, 1, 1, 1, COLOR, 1, UINT}, + {DXGI_FORMAT_R32G32_SINT, VK_FORMAT_R32G32_SINT, 8, 1, 1, 1, COLOR, 1, SINT}, + {DXGI_FORMAT_R10G10B10A2_TYPELESS, VK_FORMAT_A2B10G10R10_UNORM_PACK32, 4, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_R10G10B10A2_UNORM, VK_FORMAT_A2B10G10R10_UNORM_PACK32, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R10G10B10A2_UINT, VK_FORMAT_A2B10G10R10_UINT_PACK32, 4, 1, 1, 1, COLOR, 1, UINT}, + {DXGI_FORMAT_R11G11B10_FLOAT, VK_FORMAT_B10G11R11_UFLOAT_PACK32, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R8G8_TYPELESS, VK_FORMAT_R8G8_UNORM, 2, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8_UNORM, 2, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R8G8_UINT, VK_FORMAT_R8G8_UINT, 2, 1, 1, 1, COLOR, 1, UINT}, + {DXGI_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8_SNORM, 2, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R8G8_SINT, VK_FORMAT_R8G8_SINT, 2, 1, 1, 1, COLOR, 1, SINT}, + {DXGI_FORMAT_R8G8B8A8_TYPELESS, VK_FORMAT_R8G8B8A8_UNORM, 4, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R8G8B8A8_UNORM, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, VK_FORMAT_R8G8B8A8_SRGB, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R8G8B8A8_UINT, VK_FORMAT_R8G8B8A8_UINT, 4, 1, 1, 1, COLOR, 1, UINT}, + {DXGI_FORMAT_R8G8B8A8_SNORM, VK_FORMAT_R8G8B8A8_SNORM, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R8G8B8A8_SINT, VK_FORMAT_R8G8B8A8_SINT, 4, 1, 1, 1, COLOR, 1, SINT}, + {DXGI_FORMAT_R16G16_TYPELESS, VK_FORMAT_R16G16_SFLOAT, 4, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_R16G16_FLOAT, VK_FORMAT_R16G16_SFLOAT, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16_UNORM, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R16G16_UINT, VK_FORMAT_R16G16_UINT, 4, 1, 1, 1, COLOR, 1, UINT}, + {DXGI_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16_SNORM, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R16G16_SINT, VK_FORMAT_R16G16_SINT, 4, 1, 1, 1, COLOR, 1, SINT}, + {DXGI_FORMAT_R32_TYPELESS, VK_FORMAT_R32_UINT, 4, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_D32_FLOAT, VK_FORMAT_D32_SFLOAT, 4, 1, 1, 1, DEPTH, 1}, + {DXGI_FORMAT_R32_FLOAT, VK_FORMAT_R32_SFLOAT, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, 4, 1, 1, 1, COLOR, 1, UINT}, + {DXGI_FORMAT_R32_SINT, VK_FORMAT_R32_SINT, 4, 1, 1, 1, COLOR, 1, SINT}, + {DXGI_FORMAT_R16_TYPELESS, VK_FORMAT_R16_UINT, 2, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_R16_FLOAT, VK_FORMAT_R16_SFLOAT, 2, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_D16_UNORM, VK_FORMAT_D16_UNORM, 2, 1, 1, 1, DEPTH, 1}, + {DXGI_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, 2, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R16_UINT, VK_FORMAT_R16_UINT, 2, 1, 1, 1, COLOR, 1, UINT}, + {DXGI_FORMAT_R16_SNORM, VK_FORMAT_R16_SNORM, 2, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R16_SINT, VK_FORMAT_R16_SINT, 2, 1, 1, 1, COLOR, 1, SINT}, + {DXGI_FORMAT_R8_TYPELESS, VK_FORMAT_R8_UNORM, 1, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, 1, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R8_UINT, VK_FORMAT_R8_UINT, 1, 1, 1, 1, COLOR, 1, UINT}, + {DXGI_FORMAT_R8_SNORM, VK_FORMAT_R8_SNORM, 1, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R8_SINT, VK_FORMAT_R8_SINT, 1, 1, 1, 1, COLOR, 1, SINT}, + {DXGI_FORMAT_A8_UNORM, VK_FORMAT_R8_UNORM, 1, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_B8G8R8X8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_B8G8R8A8_TYPELESS, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, VK_FORMAT_B8G8R8A8_SRGB, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_B8G8R8X8_TYPELESS, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, VK_FORMAT_B8G8R8A8_SRGB, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_BC1_TYPELESS, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, 1, 4, 4, 8, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_BC1_UNORM, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, 1, 4, 4, 8, COLOR, 1}, + {DXGI_FORMAT_BC1_UNORM_SRGB, VK_FORMAT_BC1_RGBA_SRGB_BLOCK, 1, 4, 4, 8, COLOR, 1}, + {DXGI_FORMAT_BC2_TYPELESS, VK_FORMAT_BC2_UNORM_BLOCK, 1, 4, 4, 16, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_BC2_UNORM, VK_FORMAT_BC2_UNORM_BLOCK, 1, 4, 4, 16, COLOR, 1}, + {DXGI_FORMAT_BC2_UNORM_SRGB, VK_FORMAT_BC2_SRGB_BLOCK, 1, 4, 4, 16, COLOR, 1}, + {DXGI_FORMAT_BC3_TYPELESS, VK_FORMAT_BC3_UNORM_BLOCK, 1, 4, 4, 16, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_BC3_UNORM, VK_FORMAT_BC3_UNORM_BLOCK, 1, 4, 4, 16, COLOR, 1}, + {DXGI_FORMAT_BC3_UNORM_SRGB, VK_FORMAT_BC3_SRGB_BLOCK, 1, 4, 4, 16, COLOR, 1}, + {DXGI_FORMAT_BC4_TYPELESS, VK_FORMAT_BC4_UNORM_BLOCK, 1, 4, 4, 8, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_BC4_UNORM, VK_FORMAT_BC4_UNORM_BLOCK, 1, 4, 4, 8, COLOR, 1}, + {DXGI_FORMAT_BC4_SNORM, VK_FORMAT_BC4_SNORM_BLOCK, 1, 4, 4, 8, COLOR, 1}, + {DXGI_FORMAT_BC5_TYPELESS, VK_FORMAT_BC5_UNORM_BLOCK, 1, 4, 4, 16, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_BC5_UNORM, VK_FORMAT_BC5_UNORM_BLOCK, 1, 4, 4, 16, COLOR, 1}, + {DXGI_FORMAT_BC5_SNORM, VK_FORMAT_BC5_SNORM_BLOCK, 1, 4, 4, 16, COLOR, 1}, + {DXGI_FORMAT_BC6H_TYPELESS, VK_FORMAT_BC6H_UFLOAT_BLOCK, 1, 4, 4, 16, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_BC6H_UF16, VK_FORMAT_BC6H_UFLOAT_BLOCK, 1, 4, 4, 16, COLOR, 1}, + {DXGI_FORMAT_BC6H_SF16, VK_FORMAT_BC6H_SFLOAT_BLOCK, 1, 4, 4, 16, COLOR, 1}, + {DXGI_FORMAT_BC7_TYPELESS, VK_FORMAT_BC7_UNORM_BLOCK, 1, 4, 4, 16, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_BC7_UNORM, VK_FORMAT_BC7_UNORM_BLOCK, 1, 4, 4, 16, COLOR, 1}, + {DXGI_FORMAT_BC7_UNORM_SRGB, VK_FORMAT_BC7_SRGB_BLOCK, 1, 4, 4, 16, COLOR, 1}, +}; + +/* Each depth/stencil format is only compatible with itself in Vulkan. */ +static const struct vkd3d_format vkd3d_depth_stencil_formats[] = +{ + {DXGI_FORMAT_R32G8X24_TYPELESS, VK_FORMAT_D32_SFLOAT_S8_UINT, 8, 1, 1, 1, DEPTH_STENCIL, 2, TYPELESS}, + {DXGI_FORMAT_D32_FLOAT_S8X24_UINT, VK_FORMAT_D32_SFLOAT_S8_UINT, 8, 1, 1, 1, DEPTH_STENCIL, 2}, + {DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, VK_FORMAT_D32_SFLOAT_S8_UINT, 8, 1, 1, 1, DEPTH, 2}, + {DXGI_FORMAT_X32_TYPELESS_G8X24_UINT, VK_FORMAT_D32_SFLOAT_S8_UINT, 8, 1, 1, 1, STENCIL, 2}, + {DXGI_FORMAT_R32_TYPELESS, VK_FORMAT_D32_SFLOAT, 4, 1, 1, 1, DEPTH, 1, TYPELESS}, + {DXGI_FORMAT_R32_FLOAT, VK_FORMAT_D32_SFLOAT, 4, 1, 1, 1, DEPTH, 1}, + {DXGI_FORMAT_R24G8_TYPELESS, VK_FORMAT_D24_UNORM_S8_UINT, 4, 1, 1, 1, DEPTH_STENCIL, 2, TYPELESS}, + {DXGI_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT, 4, 1, 1, 1, DEPTH_STENCIL, 2}, + {DXGI_FORMAT_R24_UNORM_X8_TYPELESS, VK_FORMAT_D24_UNORM_S8_UINT, 4, 1, 1, 1, DEPTH, 2}, + {DXGI_FORMAT_X24_TYPELESS_G8_UINT, VK_FORMAT_D24_UNORM_S8_UINT, 4, 1, 1, 1, STENCIL, 2}, + {DXGI_FORMAT_R16_TYPELESS, VK_FORMAT_D16_UNORM, 2, 1, 1, 1, DEPTH, 1, TYPELESS}, + {DXGI_FORMAT_R16_UNORM, VK_FORMAT_D16_UNORM, 2, 1, 1, 1, DEPTH, 1}, +}; +#undef COLOR +#undef DEPTH +#undef STENCIL +#undef DEPTH_STENCIL +#undef TYPELESS +#undef SINT +#undef UINT + +static const struct vkd3d_format_compatibility_info +{ + DXGI_FORMAT format; + DXGI_FORMAT typeless_format; +} +vkd3d_format_compatibility_info[] = +{ + /* DXGI_FORMAT_R32G32B32A32_TYPELESS */ + {DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_TYPELESS}, + {DXGI_FORMAT_R32G32B32A32_SINT, DXGI_FORMAT_R32G32B32A32_TYPELESS}, + {DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_TYPELESS}, + /* DXGI_FORMAT_R32G32B32_TYPELESS */ + {DXGI_FORMAT_R32G32B32_UINT, DXGI_FORMAT_R32G32B32_TYPELESS}, + {DXGI_FORMAT_R32G32B32_SINT, DXGI_FORMAT_R32G32B32_TYPELESS}, + {DXGI_FORMAT_R32G32B32_FLOAT, DXGI_FORMAT_R32G32B32_TYPELESS}, + /* DXGI_FORMAT_R16G16B16A16_TYPELESS */ + {DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_TYPELESS}, + {DXGI_FORMAT_R16G16B16A16_SNORM, DXGI_FORMAT_R16G16B16A16_TYPELESS}, + {DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_TYPELESS}, + {DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R16G16B16A16_TYPELESS}, + {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_TYPELESS}, + /* DXGI_FORMAT_R32G32_TYPELESS */ + {DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_TYPELESS}, + {DXGI_FORMAT_R32G32_SINT, DXGI_FORMAT_R32G32_TYPELESS}, + {DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_TYPELESS}, + /* DXGI_FORMAT_R32G8X24_TYPELESS */ + {DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_R32G8X24_TYPELESS}, + {DXGI_FORMAT_X32_TYPELESS_G8X24_UINT, DXGI_FORMAT_R32G8X24_TYPELESS}, + {DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32G8X24_TYPELESS}, + /* DXGI_FORMAT_R10G10B10A2_TYPELESS */ + {DXGI_FORMAT_R10G10B10A2_UINT, DXGI_FORMAT_R10G10B10A2_TYPELESS}, + {DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_TYPELESS}, + /* DXGI_FORMAT_R8G8B8A8_TYPELESS */ + {DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_TYPELESS}, + {DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R8G8B8A8_TYPELESS}, + {DXGI_FORMAT_R8G8B8A8_SNORM, DXGI_FORMAT_R8G8B8A8_TYPELESS}, + {DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_TYPELESS}, + {DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_TYPELESS}, + /* DXGI_FORMAT_R16G16_TYPELESS */ + {DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_TYPELESS}, + {DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16_TYPELESS}, + {DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_TYPELESS}, + {DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16_TYPELESS}, + {DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_TYPELESS}, + /* DXGI_FORMAT_R32_TYPELESS */ + {DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_R32_TYPELESS}, + {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_TYPELESS}, + {DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_TYPELESS}, + {DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R32_TYPELESS}, + /* DXGI_FORMAT_R24G8_TYPELESS */ + {DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_R24G8_TYPELESS}, + {DXGI_FORMAT_X24_TYPELESS_G8_UINT, DXGI_FORMAT_R24G8_TYPELESS}, + {DXGI_FORMAT_D24_UNORM_S8_UINT, DXGI_FORMAT_R24G8_TYPELESS}, + /* DXGI_FORMAT_R8G8_TYPELESS */ + {DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8_TYPELESS}, + {DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_TYPELESS}, + {DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_TYPELESS}, + {DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8_TYPELESS}, + /* DXGI_FORMAT_R16_TYPELESS */ + {DXGI_FORMAT_D16_UNORM, DXGI_FORMAT_R16_TYPELESS}, + {DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_TYPELESS}, + {DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16_TYPELESS}, + {DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_TYPELESS}, + {DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16_TYPELESS}, + {DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_TYPELESS}, + /* DXGI_FORMAT_R8_TYPELESS */ + {DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_TYPELESS}, + {DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8_TYPELESS}, + {DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_TYPELESS}, + {DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8_TYPELESS}, + /* DXGI_FORMAT_BC1_TYPELESS */ + {DXGI_FORMAT_BC1_UNORM_SRGB, DXGI_FORMAT_BC1_TYPELESS}, + {DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_TYPELESS}, + /* DXGI_FORMAT_BC2_TYPELESS */ + {DXGI_FORMAT_BC2_UNORM_SRGB, DXGI_FORMAT_BC2_TYPELESS}, + {DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_TYPELESS}, + /* DXGI_FORMAT_BC3_TYPELESS */ + {DXGI_FORMAT_BC3_UNORM_SRGB, DXGI_FORMAT_BC3_TYPELESS}, + {DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_TYPELESS}, + /* DXGI_FORMAT_BC4_TYPELESS */ + {DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_TYPELESS}, + {DXGI_FORMAT_BC4_SNORM, DXGI_FORMAT_BC4_TYPELESS}, + /* DXGI_FORMAT_BC5_TYPELESS */ + {DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_TYPELESS}, + {DXGI_FORMAT_BC5_SNORM, DXGI_FORMAT_BC5_TYPELESS}, + /* DXGI_FORMAT_BC6H_TYPELESS */ + {DXGI_FORMAT_BC6H_UF16, DXGI_FORMAT_BC6H_TYPELESS}, + {DXGI_FORMAT_BC6H_SF16, DXGI_FORMAT_BC6H_TYPELESS}, + /* DXGI_FORMAT_BC7_TYPELESS */ + {DXGI_FORMAT_BC7_UNORM_SRGB, DXGI_FORMAT_BC7_TYPELESS}, + {DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_TYPELESS}, + /* DXGI_FORMAT_B8G8R8A8_TYPELESS */ + {DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, DXGI_FORMAT_B8G8R8A8_TYPELESS}, + {DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8A8_TYPELESS}, + /* DXGI_FORMAT_B8G8R8X8_TYPELESS */ + {DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, DXGI_FORMAT_B8G8R8X8_TYPELESS}, + {DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_FORMAT_B8G8R8X8_TYPELESS}, +}; + +static bool dxgi_format_is_depth_stencil(DXGI_FORMAT dxgi_format) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(vkd3d_formats); ++i) + { + const struct vkd3d_format *current = &vkd3d_formats[i]; + + if (current->dxgi_format == dxgi_format) + return current->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + } + + for (i = 0; i < ARRAY_SIZE(vkd3d_depth_stencil_formats); ++i) + { + if (vkd3d_depth_stencil_formats[i].dxgi_format == dxgi_format) + return true; + } + + return false; +} + +/* FIXME: This table should be generated at compile-time. */ +static HRESULT vkd3d_init_format_compatibility_lists(struct d3d12_device *device) +{ + struct vkd3d_format_compatibility_list *lists, *current_list; + const struct vkd3d_format_compatibility_info *current; + DXGI_FORMAT dxgi_format; + VkFormat vk_format; + unsigned int count; + unsigned int i, j; + + device->format_compatibility_list_count = 0; + device->format_compatibility_lists = NULL; + + if (!device->vk_info.KHR_image_format_list) + return S_OK; + + count = 1; + dxgi_format = vkd3d_format_compatibility_info[0].typeless_format; + for (i = 0; i < ARRAY_SIZE(vkd3d_format_compatibility_info); ++i) + { + DXGI_FORMAT typeless_format = vkd3d_format_compatibility_info[i].typeless_format; + + if (dxgi_format != typeless_format) + { + ++count; + dxgi_format = typeless_format; + } + } + + if (!(lists = vkd3d_calloc(count, sizeof(*lists)))) + return E_OUTOFMEMORY; + + count = 0; + current_list = lists; + current_list->typeless_format = vkd3d_format_compatibility_info[0].typeless_format; + for (i = 0; i < ARRAY_SIZE(vkd3d_format_compatibility_info); ++i) + { + current = &vkd3d_format_compatibility_info[i]; + + if (current_list->typeless_format != current->typeless_format) + { + /* Avoid empty format lists. */ + if (current_list->format_count) + { + ++current_list; + ++count; + } + + current_list->typeless_format = current->typeless_format; + } + + /* In Vulkan, each depth-stencil format is only compatible with itself. */ + if (dxgi_format_is_depth_stencil(current->format)) + continue; + + if (!(vk_format = vkd3d_get_vk_format(current->format))) + continue; + + for (j = 0; j < current_list->format_count; ++j) + { + if (current_list->vk_formats[j] == vk_format) + break; + } + + if (j >= current_list->format_count) + { + assert(current_list->format_count < VKD3D_MAX_COMPATIBLE_FORMAT_COUNT); + current_list->vk_formats[current_list->format_count++] = vk_format; + } + } + if (current_list->format_count) + ++count; + + device->format_compatibility_list_count = count; + device->format_compatibility_lists = lists; + return S_OK; +} + +static void vkd3d_cleanup_format_compatibility_lists(struct d3d12_device *device) +{ + vkd3d_free((void *)device->format_compatibility_lists); + + device->format_compatibility_lists = NULL; + device->format_compatibility_list_count = 0; +} + +static HRESULT vkd3d_init_depth_stencil_formats(struct d3d12_device *device) +{ + const unsigned int count = ARRAY_SIZE(vkd3d_depth_stencil_formats); + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkFormatProperties properties; + struct vkd3d_format *formats; + unsigned int i; + + VK_CALL(vkGetPhysicalDeviceFormatProperties(device->vk_physical_device, + VK_FORMAT_D24_UNORM_S8_UINT, &properties)); + + if (properties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) + { + device->depth_stencil_formats = vkd3d_depth_stencil_formats; + } + else + { + /* AMD doesn't support VK_FORMAT_D24_UNORM_S8_UINT. */ + WARN("Mapping VK_FORMAT_D24_UNORM_S8_UINT to VK_FORMAT_D32_SFLOAT_S8_UINT.\n"); + + if (!(formats = vkd3d_calloc(count, sizeof(*formats)))) + return E_OUTOFMEMORY; + + memcpy(formats, vkd3d_depth_stencil_formats, sizeof(vkd3d_depth_stencil_formats)); + for (i = 0; i < count; ++i) + { + if (formats[i].vk_format == VK_FORMAT_D24_UNORM_S8_UINT) + { + formats[i].vk_format = VK_FORMAT_D32_SFLOAT_S8_UINT; + formats[i].is_emulated = true; + } + } + + device->depth_stencil_formats = formats; + } + + return S_OK; +} + +static void vkd3d_cleanup_depth_stencil_formats(struct d3d12_device *device) +{ + if (vkd3d_depth_stencil_formats != device->depth_stencil_formats) + vkd3d_free((void *)device->depth_stencil_formats); + + device->depth_stencil_formats = NULL; +} + +HRESULT vkd3d_init_format_info(struct d3d12_device *device) +{ + HRESULT hr; + + if (FAILED(hr = vkd3d_init_depth_stencil_formats(device))) + return hr; + + if (FAILED(hr = vkd3d_init_format_compatibility_lists(device))) + vkd3d_cleanup_depth_stencil_formats(device); + + return hr; +} + +void vkd3d_cleanup_format_info(struct d3d12_device *device) +{ + vkd3d_cleanup_depth_stencil_formats(device); + vkd3d_cleanup_format_compatibility_lists(device); +} + +/* We use overrides for depth/stencil formats. This is required in order to + * properly support typeless formats because depth/stencil formats are only + * compatible with themselves in Vulkan. + */ +static const struct vkd3d_format *vkd3d_get_depth_stencil_format(const struct d3d12_device *device, + DXGI_FORMAT dxgi_format) +{ + const struct vkd3d_format *formats; + unsigned int i; + + assert(device); + formats = device->depth_stencil_formats; + + for (i = 0; i < ARRAY_SIZE(vkd3d_depth_stencil_formats); ++i) + { + if (formats[i].dxgi_format == dxgi_format) + return &formats[i]; + } + + return NULL; +} + +const struct vkd3d_format *vkd3d_get_format(const struct d3d12_device *device, + DXGI_FORMAT dxgi_format, bool depth_stencil) +{ + const struct vkd3d_format *format; + unsigned int i; + + if (depth_stencil && (format = vkd3d_get_depth_stencil_format(device, dxgi_format))) + return format; + + for (i = 0; i < ARRAY_SIZE(vkd3d_formats); ++i) + { + if (vkd3d_formats[i].dxgi_format == dxgi_format) + return &vkd3d_formats[i]; + } + + return NULL; +} + +const struct vkd3d_format *vkd3d_find_uint_format(const struct d3d12_device *device, DXGI_FORMAT dxgi_format) +{ + DXGI_FORMAT typeless_format = DXGI_FORMAT_UNKNOWN; + const struct vkd3d_format *vkd3d_format; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(vkd3d_format_compatibility_info); ++i) + { + if (vkd3d_format_compatibility_info[i].format == dxgi_format) + { + typeless_format = vkd3d_format_compatibility_info[i].typeless_format; + break; + } + } + + if (!typeless_format) + return NULL; + + for (i = 0; i < ARRAY_SIZE(vkd3d_format_compatibility_info); ++i) + { + if (vkd3d_format_compatibility_info[i].typeless_format != typeless_format) + continue; + + vkd3d_format = vkd3d_get_format(device, vkd3d_format_compatibility_info[i].format, false); + if (vkd3d_format->type == VKD3D_FORMAT_TYPE_UINT) + return vkd3d_format; + } + + return NULL; +} + +void vkd3d_format_copy_data(const struct vkd3d_format *format, const uint8_t *src, + unsigned int src_row_pitch, unsigned int src_slice_pitch, uint8_t *dst, unsigned int dst_row_pitch, + unsigned int dst_slice_pitch, unsigned int w, unsigned int h, unsigned int d) +{ + unsigned int row_block_count, row_count, row_size, slice, row; + unsigned int slice_count = d; + const uint8_t *src_row; + uint8_t *dst_row; + + row_block_count = (w + format->block_width - 1) / format->block_width; + row_count = (h + format->block_height - 1) / format->block_height; + row_size = row_block_count * format->byte_count * format->block_byte_count; + + for (slice = 0; slice < slice_count; ++slice) + { + for (row = 0; row < row_count; ++row) + { + src_row = &src[slice * src_slice_pitch + row * src_row_pitch]; + dst_row = &dst[slice * dst_slice_pitch + row * dst_row_pitch]; + memcpy(dst_row, src_row, row_size); + } + } +} + +VkFormat vkd3d_get_vk_format(DXGI_FORMAT format) +{ + const struct vkd3d_format *vkd3d_format; + + if (!(vkd3d_format = vkd3d_get_format(NULL, format, false))) + return VK_FORMAT_UNDEFINED; + + return vkd3d_format->vk_format; +} + +DXGI_FORMAT vkd3d_get_dxgi_format(VkFormat format) +{ + DXGI_FORMAT dxgi_format; + VkFormat vk_format; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(vkd3d_formats); ++i) + { + vk_format = vkd3d_formats[i].vk_format; + dxgi_format = vkd3d_formats[i].dxgi_format; + if (vk_format == format && vkd3d_formats[i].type != VKD3D_FORMAT_TYPE_TYPELESS) + return dxgi_format; + } + + FIXME("Unhandled Vulkan format %#x.\n", format); + return DXGI_FORMAT_UNKNOWN; +} + +bool is_valid_feature_level(D3D_FEATURE_LEVEL feature_level) +{ + static const D3D_FEATURE_LEVEL valid_feature_levels[] = + { + D3D_FEATURE_LEVEL_12_1, + D3D_FEATURE_LEVEL_12_0, + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + D3D_FEATURE_LEVEL_9_3, + D3D_FEATURE_LEVEL_9_2, + D3D_FEATURE_LEVEL_9_1, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(valid_feature_levels); ++i) + { + if (valid_feature_levels[i] == feature_level) + return true; + } + + return false; +} + +bool is_write_resource_state(D3D12_RESOURCE_STATES state) +{ + return state & (D3D12_RESOURCE_STATE_RENDER_TARGET + | D3D12_RESOURCE_STATE_UNORDERED_ACCESS + | D3D12_RESOURCE_STATE_DEPTH_WRITE + | D3D12_RESOURCE_STATE_STREAM_OUT + | D3D12_RESOURCE_STATE_COPY_DEST + | D3D12_RESOURCE_STATE_RESOLVE_DEST); +} + +static bool is_power_of_two(unsigned int x) +{ + return x && !(x & (x -1)); +} + +bool is_valid_resource_state(D3D12_RESOURCE_STATES state) +{ + const D3D12_RESOURCE_STATES valid_states = + D3D12_RESOURCE_STATE_COMMON | + D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER | + D3D12_RESOURCE_STATE_INDEX_BUFFER | + D3D12_RESOURCE_STATE_RENDER_TARGET | + D3D12_RESOURCE_STATE_UNORDERED_ACCESS | + D3D12_RESOURCE_STATE_DEPTH_WRITE | + D3D12_RESOURCE_STATE_DEPTH_READ | + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | + D3D12_RESOURCE_STATE_STREAM_OUT | + D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT | + D3D12_RESOURCE_STATE_COPY_DEST | + D3D12_RESOURCE_STATE_COPY_SOURCE | + D3D12_RESOURCE_STATE_RESOLVE_DEST | + D3D12_RESOURCE_STATE_RESOLVE_SOURCE | + D3D12_RESOURCE_STATE_GENERIC_READ | + D3D12_RESOURCE_STATE_PRESENT | + D3D12_RESOURCE_STATE_PREDICATION; + + if (state & ~valid_states) + { + WARN("Invalid resource states %#x.\n", state & ~valid_states); + return false; + } + + /* Exactly one bit must be set for write states. */ + if (is_write_resource_state(state) && !is_power_of_two(state)) + { + WARN("Write state cannot be mixed with other states: %#x.\n", state); + return false; + } + + return true; +} + +HRESULT return_interface(void *iface, REFIID iface_iid, + REFIID requested_iid, void **object) +{ + IUnknown *unknown = iface; + HRESULT hr; + + if (IsEqualGUID(iface_iid, requested_iid)) + { + *object = unknown; + return S_OK; + } + + hr = IUnknown_QueryInterface(unknown, requested_iid, object); + IUnknown_Release(unknown); + return hr; +} + +const char *debug_d3d12_box(const D3D12_BOX *box) +{ + if (!box) + return "(null)"; + + return vkd3d_dbg_sprintf("(%u, %u, %u)-(%u, %u, %u)", + box->left, box->top, box->front, + box->right, box->bottom, box->back); +} + +static const char *debug_d3d12_shader_component(D3D12_SHADER_COMPONENT_MAPPING component) +{ + switch (component) + { + case D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0: + return "r"; + case D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1: + return "g"; + case D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2: + return "b"; + case D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3: + return "a"; + case D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0: + return "0"; + case D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1: + return "1"; + } + + FIXME("Invalid component mapping %#x.\n", component); + return "invalid"; +} + +const char *debug_d3d12_shader_component_mapping(unsigned int mapping) +{ + return vkd3d_dbg_sprintf("{%s, %s, %s, %s}", + debug_d3d12_shader_component(D3D12_DECODE_SHADER_4_COMPONENT_MAPPING(0, mapping)), + debug_d3d12_shader_component(D3D12_DECODE_SHADER_4_COMPONENT_MAPPING(1, mapping)), + debug_d3d12_shader_component(D3D12_DECODE_SHADER_4_COMPONENT_MAPPING(2, mapping)), + debug_d3d12_shader_component(D3D12_DECODE_SHADER_4_COMPONENT_MAPPING(3, mapping))); +} + +const char *debug_vk_extent_3d(VkExtent3D extent) +{ + return vkd3d_dbg_sprintf("(%u, %u, %u)", + (unsigned int)extent.width, + (unsigned int)extent.height, + (unsigned int)extent.depth); +} + +const char *debug_vk_queue_flags(VkQueueFlags flags) +{ + char buffer[120]; + + buffer[0] = '\0'; +#define FLAG_TO_STR(f) if (flags & f) { strcat(buffer, " | "#f); flags &= ~f; } + FLAG_TO_STR(VK_QUEUE_GRAPHICS_BIT) + FLAG_TO_STR(VK_QUEUE_COMPUTE_BIT) + FLAG_TO_STR(VK_QUEUE_TRANSFER_BIT) + FLAG_TO_STR(VK_QUEUE_SPARSE_BINDING_BIT) +#undef FLAG_TO_STR + if (flags) + FIXME("Unrecognized flag(s) %#x.\n", flags); + + if (!buffer[0]) + return "0"; + return vkd3d_dbg_sprintf("%s", &buffer[3]); +} + +const char *debug_vk_memory_heap_flags(VkMemoryHeapFlags flags) +{ + char buffer[80]; + + buffer[0] = '\0'; +#define FLAG_TO_STR(f) if (flags & f) { strcat(buffer, " | "#f); flags &= ~f; } + FLAG_TO_STR(VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) + FLAG_TO_STR(VK_MEMORY_HEAP_MULTI_INSTANCE_BIT) +#undef FLAG_TO_STR + if (flags) + FIXME("Unrecognized flag(s) %#x.\n", flags); + + if (!buffer[0]) + return "0"; + return vkd3d_dbg_sprintf("%s", &buffer[3]); +} + +const char *debug_vk_memory_property_flags(VkMemoryPropertyFlags flags) +{ + char buffer[320]; + + buffer[0] = '\0'; +#define FLAG_TO_STR(f) if (flags & f) { strcat(buffer, " | "#f); flags &= ~f; } + FLAG_TO_STR(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + FLAG_TO_STR(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + FLAG_TO_STR(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + FLAG_TO_STR(VK_MEMORY_PROPERTY_HOST_CACHED_BIT) + FLAG_TO_STR(VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) + FLAG_TO_STR(VK_MEMORY_PROPERTY_PROTECTED_BIT) +#undef FLAG_TO_STR +#define FLAG_TO_STR(f, n) if (flags & f) { strcat(buffer, " | "#n); flags &= ~f; } + FLAG_TO_STR(0x40, VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD) + FLAG_TO_STR(0x80, VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD) +#undef FLAG_TO_STR + if (flags) + FIXME("Unrecognized flag(s) %#x.\n", flags); + + if (!buffer[0]) + return "0"; + return vkd3d_dbg_sprintf("%s", &buffer[3]); +} + +HRESULT hresult_from_errno(int rc) +{ + switch (rc) + { + case 0: + return S_OK; + case ENOMEM: + return E_OUTOFMEMORY; + case EINVAL: + return E_INVALIDARG; + default: + FIXME("Unhandled errno %d.\n", rc); + return E_FAIL; + } +} + +HRESULT hresult_from_vk_result(VkResult vr) +{ + switch (vr) + { + case VK_SUCCESS: + return S_OK; + case VK_ERROR_OUT_OF_DEVICE_MEMORY: + WARN("Out of device memory.\n"); + /* fall-through */ + case VK_ERROR_OUT_OF_HOST_MEMORY: + return E_OUTOFMEMORY; + default: + FIXME("Unhandled VkResult %d.\n", vr); + /* fall-through */ + case VK_ERROR_DEVICE_LOST: + case VK_ERROR_EXTENSION_NOT_PRESENT: + return E_FAIL; + } +} + +#define LOAD_GLOBAL_PFN(name) \ + if (!(procs->name = (void *)vkGetInstanceProcAddr(NULL, #name))) \ + { \ + ERR("Could not get global proc addr for '" #name "'.\n"); \ + return E_FAIL; \ + } + +HRESULT vkd3d_load_vk_global_procs(struct vkd3d_vk_global_procs *procs, + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) +{ + memset(procs, 0, sizeof(*procs)); + + procs->vkGetInstanceProcAddr = vkGetInstanceProcAddr; + + LOAD_GLOBAL_PFN(vkCreateInstance) + LOAD_GLOBAL_PFN(vkEnumerateInstanceExtensionProperties) + + TRACE("Loaded global Vulkan procs.\n"); + return S_OK; +} + +#define LOAD_INSTANCE_PFN(name) \ + if (!(procs->name = (void *)global_procs->vkGetInstanceProcAddr(instance, #name))) \ + { \ + ERR("Could not get instance proc addr for '" #name "'.\n"); \ + return E_FAIL; \ + } +#define LOAD_INSTANCE_OPTIONAL_PFN(name) \ + procs->name = (void *)global_procs->vkGetInstanceProcAddr(instance, #name); + +HRESULT vkd3d_load_vk_instance_procs(struct vkd3d_vk_instance_procs *procs, + const struct vkd3d_vk_global_procs *global_procs, VkInstance instance) +{ + memset(procs, 0, sizeof(*procs)); + +#define VK_INSTANCE_PFN LOAD_INSTANCE_PFN +#define VK_INSTANCE_EXT_PFN LOAD_INSTANCE_OPTIONAL_PFN +#include "vulkan_procs.h" + + TRACE("Loaded procs for VkInstance %p.\n", instance); + return S_OK; +} + +#define COPY_PARENT_PFN(name) procs->name = parent_procs->name; +#define LOAD_DEVICE_PFN(name) \ + if (!(procs->name = (void *)procs->vkGetDeviceProcAddr(device, #name))) \ + { \ + ERR("Could not get device proc addr for '" #name "'.\n"); \ + return E_FAIL; \ + } +#define LOAD_DEVICE_OPTIONAL_PFN(name) \ + procs->name = (void *)procs->vkGetDeviceProcAddr(device, #name); + +HRESULT vkd3d_load_vk_device_procs(struct vkd3d_vk_device_procs *procs, + const struct vkd3d_vk_instance_procs *parent_procs, VkDevice device) +{ + memset(procs, 0, sizeof(*procs)); + +#define VK_INSTANCE_PFN COPY_PARENT_PFN +#define VK_DEVICE_PFN LOAD_DEVICE_PFN +#define VK_DEVICE_EXT_PFN LOAD_DEVICE_OPTIONAL_PFN +#include "vulkan_procs.h" + + TRACE("Loaded procs for VkDevice %p.\n", device); + return S_OK; +} + +#if HAVE_DECL_PROGRAM_INVOCATION_NAME + +bool vkd3d_get_program_name(char program_name[PATH_MAX]) +{ + char *name, *p, *real_path = NULL; + + if ((name = strrchr(program_invocation_name, '/'))) + { + real_path = realpath("/proc/self/exe", NULL); + + /* Try to strip command line arguments. */ + if (real_path && (p = strrchr(real_path, '/')) + && !strncmp(real_path, program_invocation_name, strlen(real_path))) + { + name = p; + } + + ++name; + } + else if ((name = strrchr(program_invocation_name, '\\'))) + { + ++name; + } + else + { + name = program_invocation_name; + } + + strncpy(program_name, name, PATH_MAX); + program_name[PATH_MAX - 1] = '\0'; + free(real_path); + return true; +} + +#else + +bool vkd3d_get_program_name(char program_name[PATH_MAX]) +{ + *program_name = '\0'; + return false; +} + +#endif /* HAVE_DECL_PROGRAM_INVOCATION_NAME */ + +static struct vkd3d_private_data *vkd3d_private_store_get_private_data( + const struct vkd3d_private_store *store, const GUID *tag) +{ + struct vkd3d_private_data *data; + + LIST_FOR_EACH_ENTRY(data, &store->content, struct vkd3d_private_data, entry) + { + if (IsEqualGUID(&data->tag, tag)) + return data; + } + + return NULL; +} + +static HRESULT vkd3d_private_store_set_private_data(struct vkd3d_private_store *store, + const GUID *tag, const void *data, unsigned int data_size, bool is_object) +{ + struct vkd3d_private_data *d, *old_data; + const void *ptr = data; + + if (!data) + { + if ((d = vkd3d_private_store_get_private_data(store, tag))) + { + vkd3d_private_data_destroy(d); + return S_OK; + } + + return S_FALSE; + } + + if (is_object) + { + if (data_size != sizeof(IUnknown *)) + return E_INVALIDARG; + ptr = &data; + } + + if (!(d = vkd3d_malloc(offsetof(struct vkd3d_private_data, u.data[data_size])))) + return E_OUTOFMEMORY; + + d->tag = *tag; + d->size = data_size; + d->is_object = is_object; + memcpy(d->u.data, ptr, data_size); + if (is_object) + IUnknown_AddRef(d->u.object); + + if ((old_data = vkd3d_private_store_get_private_data(store, tag))) + vkd3d_private_data_destroy(old_data); + list_add_tail(&store->content, &d->entry); + + return S_OK; +} + +HRESULT vkd3d_get_private_data(struct vkd3d_private_store *store, + const GUID *tag, unsigned int *out_size, void *out) +{ + const struct vkd3d_private_data *data; + HRESULT hr = S_OK; + unsigned int size; + int rc; + + if (!out_size) + return E_INVALIDARG; + + if ((rc = vkd3d_mutex_lock(&store->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if (!(data = vkd3d_private_store_get_private_data(store, tag))) + { + *out_size = 0; + hr = DXGI_ERROR_NOT_FOUND; + goto done; + } + + size = *out_size; + *out_size = data->size; + if (!out) + goto done; + + if (size < data->size) + { + hr = DXGI_ERROR_MORE_DATA; + goto done; + } + + if (data->is_object) + IUnknown_AddRef(data->u.object); + memcpy(out, data->u.data, data->size); + +done: + vkd3d_mutex_unlock(&store->mutex); + return hr; +} + +HRESULT vkd3d_set_private_data(struct vkd3d_private_store *store, + const GUID *tag, unsigned int data_size, const void *data) +{ + HRESULT hr; + int rc; + + if ((rc = vkd3d_mutex_lock(&store->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + hr = vkd3d_private_store_set_private_data(store, tag, data, data_size, false); + + vkd3d_mutex_unlock(&store->mutex); + return hr; +} + +HRESULT vkd3d_set_private_data_interface(struct vkd3d_private_store *store, + const GUID *tag, const IUnknown *object) +{ + const void *data = object ? object : (void *)&object; + HRESULT hr; + int rc; + + if ((rc = vkd3d_mutex_lock(&store->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + hr = vkd3d_private_store_set_private_data(store, tag, data, sizeof(object), !!object); + + vkd3d_mutex_unlock(&store->mutex); + return hr; +} + +VkResult vkd3d_set_vk_object_name_utf8(struct d3d12_device *device, uint64_t vk_object, + VkDebugReportObjectTypeEXT vk_object_type, const char *name) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDebugMarkerObjectNameInfoEXT info; + + if (!device->vk_info.EXT_debug_marker) + return VK_SUCCESS; + + info.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_OBJECT_NAME_INFO_EXT; + info.pNext = NULL; + info.objectType = vk_object_type; + info.object = vk_object; + info.pObjectName = name; + return VK_CALL(vkDebugMarkerSetObjectNameEXT(device->vk_device, &info)); +} + +HRESULT vkd3d_set_vk_object_name(struct d3d12_device *device, uint64_t vk_object, + VkDebugReportObjectTypeEXT vk_object_type, const WCHAR *name) +{ + char *name_utf8; + VkResult vr; + + if (!name) + return E_INVALIDARG; + + if (!device->vk_info.EXT_debug_marker) + return S_OK; + + if (!(name_utf8 = vkd3d_strdup_w_utf8(name, device->wchar_size))) + return E_OUTOFMEMORY; + + vr = vkd3d_set_vk_object_name_utf8(device, vk_object, vk_object_type, name_utf8); + + vkd3d_free(name_utf8); + + return hresult_from_vk_result(vr); +} diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c new file mode 100644 index 00000000000..21d998bf94a --- /dev/null +++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c @@ -0,0 +1,512 @@ +/* + * Copyright 2016-2017 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_private.h" + +VKD3D_DEBUG_ENV_NAME("VKD3D_DEBUG"); + +HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, + REFIID iid, void **device) +{ + struct vkd3d_instance *instance; + struct d3d12_device *object; + HRESULT hr; + + TRACE("create_info %p, iid %s, device %p.\n", create_info, debugstr_guid(iid), device); + + if (!create_info) + return E_INVALIDARG; + if (create_info->type != VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO) + { + WARN("Invalid structure type %#x.\n", create_info->type); + return E_INVALIDARG; + } + if (!create_info->instance && !create_info->instance_create_info) + { + ERR("Instance or instance create info is required.\n"); + return E_INVALIDARG; + } + if (create_info->instance && create_info->instance_create_info) + { + ERR("Instance and instance create info are mutually exclusive parameters.\n"); + return E_INVALIDARG; + } + + if (create_info->minimum_feature_level < D3D_FEATURE_LEVEL_11_0 + || !is_valid_feature_level(create_info->minimum_feature_level)) + { + WARN("Invalid feature level %#x.\n", create_info->minimum_feature_level); + return E_INVALIDARG; + } + + if ((instance = create_info->instance)) + { + vkd3d_instance_incref(instance); + } + else if (FAILED(hr = vkd3d_create_instance(create_info->instance_create_info, &instance))) + { + WARN("Failed to create instance, hr %#x.\n", hr); + return E_FAIL; + } + + hr = d3d12_device_create(instance, create_info, &object); + vkd3d_instance_decref(instance); + if (FAILED(hr)) + return hr; + + if (!device) + { + ID3D12Device_Release(&object->ID3D12Device_iface); + return S_FALSE; + } + + return return_interface(&object->ID3D12Device_iface, &IID_ID3D12Device, iid, device); +} + +/* ID3D12RootSignatureDeserializer */ +struct d3d12_root_signature_deserializer +{ + ID3D12RootSignatureDeserializer ID3D12RootSignatureDeserializer_iface; + LONG refcount; + + union + { + D3D12_VERSIONED_ROOT_SIGNATURE_DESC d3d12; + struct vkd3d_shader_versioned_root_signature_desc vkd3d; + } desc; +}; + +STATIC_ASSERT(sizeof(D3D12_ROOT_SIGNATURE_DESC) == sizeof(struct vkd3d_shader_root_signature_desc)); + +static struct d3d12_root_signature_deserializer *impl_from_ID3D12RootSignatureDeserializer( + ID3D12RootSignatureDeserializer *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_root_signature_deserializer, ID3D12RootSignatureDeserializer_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_root_signature_deserializer_QueryInterface( + ID3D12RootSignatureDeserializer *iface, REFIID riid, void **object) +{ + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + + /* QueryInterface() implementation is broken, E_NOINTERFACE is returned for + * IUnknown. + */ + if (IsEqualGUID(riid, &IID_ID3D12RootSignatureDeserializer)) + { + ID3D12RootSignatureDeserializer_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_root_signature_deserializer_AddRef(ID3D12RootSignatureDeserializer *iface) +{ + struct d3d12_root_signature_deserializer *deserializer = impl_from_ID3D12RootSignatureDeserializer(iface); + ULONG refcount = InterlockedIncrement(&deserializer->refcount); + + TRACE("%p increasing refcount to %u.\n", deserializer, refcount); + + return refcount; +} + +static ULONG STDMETHODCALLTYPE d3d12_root_signature_deserializer_Release(ID3D12RootSignatureDeserializer *iface) +{ + struct d3d12_root_signature_deserializer *deserializer = impl_from_ID3D12RootSignatureDeserializer(iface); + ULONG refcount = InterlockedDecrement(&deserializer->refcount); + + TRACE("%p decreasing refcount to %u.\n", deserializer, refcount); + + if (!refcount) + { + vkd3d_shader_free_root_signature(&deserializer->desc.vkd3d); + vkd3d_free(deserializer); + } + + return refcount; +} + +static const D3D12_ROOT_SIGNATURE_DESC * STDMETHODCALLTYPE d3d12_root_signature_deserializer_GetRootSignatureDesc( + ID3D12RootSignatureDeserializer *iface) +{ + struct d3d12_root_signature_deserializer *deserializer = impl_from_ID3D12RootSignatureDeserializer(iface); + + TRACE("iface %p.\n", iface); + + assert(deserializer->desc.d3d12.Version == D3D_ROOT_SIGNATURE_VERSION_1_0); + return &deserializer->desc.d3d12.u.Desc_1_0; +} + +static const struct ID3D12RootSignatureDeserializerVtbl d3d12_root_signature_deserializer_vtbl = +{ + /* IUnknown methods */ + d3d12_root_signature_deserializer_QueryInterface, + d3d12_root_signature_deserializer_AddRef, + d3d12_root_signature_deserializer_Release, + /* ID3D12RootSignatureDeserializer methods */ + d3d12_root_signature_deserializer_GetRootSignatureDesc, +}; + +int vkd3d_parse_root_signature_v_1_0(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_versioned_root_signature_desc *out_desc) +{ + struct vkd3d_shader_versioned_root_signature_desc desc, converted_desc; + int ret; + + if ((ret = vkd3d_shader_parse_root_signature(dxbc, &desc, NULL)) < 0) + { + WARN("Failed to parse root signature, vkd3d result %d.\n", ret); + return ret; + } + + if (desc.version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) + { + *out_desc = desc; + } + else + { + enum vkd3d_shader_root_signature_version version = desc.version; + + ret = vkd3d_shader_convert_root_signature(&converted_desc, VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0, &desc); + vkd3d_shader_free_root_signature(&desc); + if (ret < 0) + { + WARN("Failed to convert from version %#x, vkd3d result %d.\n", version, ret); + return ret; + } + + *out_desc = converted_desc; + } + + return ret; +} + +static HRESULT d3d12_root_signature_deserializer_init(struct d3d12_root_signature_deserializer *deserializer, + const struct vkd3d_shader_code *dxbc) +{ + int ret; + + deserializer->ID3D12RootSignatureDeserializer_iface.lpVtbl = &d3d12_root_signature_deserializer_vtbl; + deserializer->refcount = 1; + + if ((ret = vkd3d_parse_root_signature_v_1_0(dxbc, &deserializer->desc.vkd3d)) < 0) + return hresult_from_vkd3d_result(ret); + + return S_OK; +} + +HRESULT vkd3d_create_root_signature_deserializer(const void *data, SIZE_T data_size, + REFIID iid, void **deserializer) +{ + struct vkd3d_shader_code dxbc = {data, data_size}; + struct d3d12_root_signature_deserializer *object; + HRESULT hr; + + TRACE("data %p, data_size %lu, iid %s, deserializer %p.\n", + data, data_size, debugstr_guid(iid), deserializer); + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_root_signature_deserializer_init(object, &dxbc))) + { + vkd3d_free(object); + return hr; + } + + return return_interface(&object->ID3D12RootSignatureDeserializer_iface, + &IID_ID3D12RootSignatureDeserializer, iid, deserializer); +} + +/* ID3D12VersionedRootSignatureDeserializer */ +struct d3d12_versioned_root_signature_deserializer +{ + ID3D12VersionedRootSignatureDeserializer ID3D12VersionedRootSignatureDeserializer_iface; + LONG refcount; + + union + { + D3D12_VERSIONED_ROOT_SIGNATURE_DESC d3d12; + struct vkd3d_shader_versioned_root_signature_desc vkd3d; + } desc, other_desc; +}; + +STATIC_ASSERT(sizeof(D3D12_VERSIONED_ROOT_SIGNATURE_DESC) == sizeof(struct vkd3d_shader_versioned_root_signature_desc)); + +static struct d3d12_versioned_root_signature_deserializer *impl_from_ID3D12VersionedRootSignatureDeserializer( + ID3D12VersionedRootSignatureDeserializer *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_versioned_root_signature_deserializer, + ID3D12VersionedRootSignatureDeserializer_iface); +} + +static HRESULT STDMETHODCALLTYPE d3d12_versioned_root_signature_deserializer_QueryInterface( + ID3D12VersionedRootSignatureDeserializer *iface, REFIID iid, void **object) +{ + TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); + + /* QueryInterface() implementation is broken, E_NOINTERFACE is returned for + * IUnknown. + */ + if (IsEqualGUID(iid, &IID_ID3D12VersionedRootSignatureDeserializer)) + { + ID3D12VersionedRootSignatureDeserializer_AddRef(iface); + *object = iface; + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid)); + + *object = NULL; + return E_NOINTERFACE; +} + +static ULONG STDMETHODCALLTYPE d3d12_versioned_root_signature_deserializer_AddRef(ID3D12VersionedRootSignatureDeserializer *iface) +{ + struct d3d12_versioned_root_signature_deserializer *deserializer = impl_from_ID3D12VersionedRootSignatureDeserializer(iface); + ULONG refcount = InterlockedIncrement(&deserializer->refcount); + + TRACE("%p increasing refcount to %u.\n", deserializer, refcount); + + return refcount; +} + +static ULONG STDMETHODCALLTYPE d3d12_versioned_root_signature_deserializer_Release(ID3D12VersionedRootSignatureDeserializer *iface) +{ + struct d3d12_versioned_root_signature_deserializer *deserializer = impl_from_ID3D12VersionedRootSignatureDeserializer(iface); + ULONG refcount = InterlockedDecrement(&deserializer->refcount); + + TRACE("%p decreasing refcount to %u.\n", deserializer, refcount); + + if (!refcount) + { + vkd3d_shader_free_root_signature(&deserializer->desc.vkd3d); + vkd3d_shader_free_root_signature(&deserializer->other_desc.vkd3d); + vkd3d_free(deserializer); + } + + return refcount; +} + +static enum vkd3d_shader_root_signature_version vkd3d_root_signature_version_from_d3d12( + D3D_ROOT_SIGNATURE_VERSION version) +{ + switch (version) + { + case D3D_ROOT_SIGNATURE_VERSION_1_0: + return VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0; + case D3D_ROOT_SIGNATURE_VERSION_1_1: + return VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1; + default: + WARN("Unknown root signature version %#x.\n", version); + return 0; + } +} + +static HRESULT STDMETHODCALLTYPE d3d12_versioned_root_signature_deserializer_GetRootSignatureDescAtVersion( + ID3D12VersionedRootSignatureDeserializer *iface, D3D_ROOT_SIGNATURE_VERSION version, + const D3D12_VERSIONED_ROOT_SIGNATURE_DESC **desc) +{ + struct d3d12_versioned_root_signature_deserializer *deserializer = impl_from_ID3D12VersionedRootSignatureDeserializer(iface); + int ret; + + TRACE("iface %p, version %#x, desc %p.\n", iface, version, desc); + + if (version != D3D_ROOT_SIGNATURE_VERSION_1_0 && version != D3D_ROOT_SIGNATURE_VERSION_1_1) + { + WARN("Root signature version %#x not supported.\n", version); + return E_INVALIDARG; + } + + if (deserializer->desc.d3d12.Version == version) + { + *desc = &deserializer->desc.d3d12; + return S_OK; + } + + if (!deserializer->other_desc.d3d12.Version) + { + if ((ret = vkd3d_shader_convert_root_signature(&deserializer->other_desc.vkd3d, + vkd3d_root_signature_version_from_d3d12(version), &deserializer->desc.vkd3d)) < 0) + { + WARN("Failed to convert versioned root signature, vkd3d result %d.\n", ret); + return hresult_from_vkd3d_result(ret); + } + } + + assert(deserializer->other_desc.d3d12.Version == version); + *desc = &deserializer->other_desc.d3d12; + return S_OK; +} + +static const D3D12_VERSIONED_ROOT_SIGNATURE_DESC * STDMETHODCALLTYPE +d3d12_versioned_root_signature_deserializer_GetUnconvertedRootSignatureDesc(ID3D12VersionedRootSignatureDeserializer *iface) +{ + struct d3d12_versioned_root_signature_deserializer *deserializer = impl_from_ID3D12VersionedRootSignatureDeserializer(iface); + + TRACE("iface %p.\n", iface); + + return &deserializer->desc.d3d12; +} + +static const struct ID3D12VersionedRootSignatureDeserializerVtbl d3d12_versioned_root_signature_deserializer_vtbl = +{ + /* IUnknown methods */ + d3d12_versioned_root_signature_deserializer_QueryInterface, + d3d12_versioned_root_signature_deserializer_AddRef, + d3d12_versioned_root_signature_deserializer_Release, + /* ID3D12VersionedRootSignatureDeserializer methods */ + d3d12_versioned_root_signature_deserializer_GetRootSignatureDescAtVersion, + d3d12_versioned_root_signature_deserializer_GetUnconvertedRootSignatureDesc, +}; + +static HRESULT d3d12_versioned_root_signature_deserializer_init(struct d3d12_versioned_root_signature_deserializer *deserializer, + const struct vkd3d_shader_code *dxbc) +{ + int ret; + + deserializer->ID3D12VersionedRootSignatureDeserializer_iface.lpVtbl = &d3d12_versioned_root_signature_deserializer_vtbl; + deserializer->refcount = 1; + + if ((ret = vkd3d_shader_parse_root_signature(dxbc, &deserializer->desc.vkd3d, NULL)) < 0) + { + WARN("Failed to parse root signature, vkd3d result %d.\n", ret); + return hresult_from_vkd3d_result(ret); + } + + memset(&deserializer->other_desc, 0, sizeof(deserializer->other_desc)); + + return S_OK; +} + +HRESULT vkd3d_create_versioned_root_signature_deserializer(const void *data, SIZE_T data_size, + REFIID iid, void **deserializer) +{ + struct d3d12_versioned_root_signature_deserializer *object; + struct vkd3d_shader_code dxbc = {data, data_size}; + HRESULT hr; + + TRACE("data %p, data_size %lu, iid %s, deserializer %p.\n", + data, data_size, debugstr_guid(iid), deserializer); + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_versioned_root_signature_deserializer_init(object, &dxbc))) + { + vkd3d_free(object); + return hr; + } + + return return_interface(&object->ID3D12VersionedRootSignatureDeserializer_iface, + &IID_ID3D12VersionedRootSignatureDeserializer, iid, deserializer); +} + +HRESULT vkd3d_serialize_root_signature(const D3D12_ROOT_SIGNATURE_DESC *desc, + D3D_ROOT_SIGNATURE_VERSION version, ID3DBlob **blob, ID3DBlob **error_blob) +{ + struct vkd3d_shader_versioned_root_signature_desc vkd3d_desc; + struct vkd3d_shader_code dxbc; + char *messages; + HRESULT hr; + int ret; + + TRACE("desc %p, version %#x, blob %p, error_blob %p.\n", desc, version, blob, error_blob); + + if (version != D3D_ROOT_SIGNATURE_VERSION_1_0) + { + WARN("Unexpected Root signature version %#x.\n", version); + return E_INVALIDARG; + } + + if (!blob) + { + WARN("Invalid blob parameter.\n"); + return E_INVALIDARG; + } + + if (error_blob) + *error_blob = NULL; + + vkd3d_desc.version = VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0; + vkd3d_desc.u.v_1_0 = *(const struct vkd3d_shader_root_signature_desc *)desc; + if ((ret = vkd3d_shader_serialize_root_signature(&vkd3d_desc, &dxbc, &messages)) < 0) + { + WARN("Failed to serialize root signature, vkd3d result %d.\n", ret); + if (error_blob && messages) + { + if (FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) + ERR("Failed to create error blob, hr %#x.\n", hr); + } + return hresult_from_vkd3d_result(ret); + } + vkd3d_shader_free_messages(messages); + + if (FAILED(hr = vkd3d_blob_create((void *)dxbc.code, dxbc.size, blob))) + { + WARN("Failed to create blob object, hr %#x.\n", hr); + vkd3d_shader_free_shader_code(&dxbc); + } + return hr; +} + +HRESULT vkd3d_serialize_versioned_root_signature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc, + ID3DBlob **blob, ID3DBlob **error_blob) +{ + const struct vkd3d_shader_versioned_root_signature_desc *vkd3d_desc; + struct vkd3d_shader_code dxbc; + char *messages; + HRESULT hr; + int ret; + + TRACE("desc %p, blob %p, error_blob %p.\n", desc, blob, error_blob); + + if (!blob) + { + WARN("Invalid blob parameter.\n"); + return E_INVALIDARG; + } + + if (error_blob) + *error_blob = NULL; + + vkd3d_desc = (const struct vkd3d_shader_versioned_root_signature_desc *)desc; + if ((ret = vkd3d_shader_serialize_root_signature(vkd3d_desc, &dxbc, &messages)) < 0) + { + WARN("Failed to serialize root signature, vkd3d result %d.\n", ret); + if (error_blob && messages) + { + if (FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) + ERR("Failed to create error blob, hr %#x.\n", hr); + } + return hresult_from_vkd3d_result(ret); + } + vkd3d_shader_free_messages(messages); + + if (FAILED(hr = vkd3d_blob_create((void *)dxbc.code, dxbc.size, blob))) + { + WARN("Failed to create blob object, hr %#x.\n", hr); + vkd3d_shader_free_shader_code(&dxbc); + } + return hr; +} diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h new file mode 100644 index 00000000000..ac93245fe49 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -0,0 +1,1582 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_PRIVATE_H +#define __VKD3D_PRIVATE_H + +#define COBJMACROS +#define NONAMELESSUNION +#define VK_NO_PROTOTYPES + +#ifdef _WIN32 +# define _WIN32_WINNT 0x0600 /* for condition variables */ +#endif + +#include "vkd3d_common.h" +#include "vkd3d_blob.h" +#include "vkd3d_memory.h" +#include "vkd3d_utf8.h" +#include "wine/list.h" +#include "wine/rbtree.h" + +#include "vkd3d.h" +#include "vkd3d_shader.h" + +#include +#include +#include +#include + +#define VK_CALL(f) (vk_procs->f) + +#define VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW 0x01000000u + +#define VKD3D_DESCRIPTOR_MAGIC_FREE 0x00000000u +#define VKD3D_DESCRIPTOR_MAGIC_CBV VKD3D_MAKE_TAG('C', 'B', 'V', 0) +#define VKD3D_DESCRIPTOR_MAGIC_SRV VKD3D_MAKE_TAG('S', 'R', 'V', 1) +#define VKD3D_DESCRIPTOR_MAGIC_UAV VKD3D_MAKE_TAG('U', 'A', 'V', 1) +#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER VKD3D_MAKE_TAG('S', 'M', 'P', 1) +#define VKD3D_DESCRIPTOR_MAGIC_DSV VKD3D_MAKE_TAG('D', 'S', 'V', 0) +#define VKD3D_DESCRIPTOR_MAGIC_RTV VKD3D_MAKE_TAG('R', 'T', 'V', 0) + +#define VKD3D_MAX_COMPATIBLE_FORMAT_COUNT 6u +#define VKD3D_MAX_QUEUE_FAMILY_COUNT 3u +#define VKD3D_MAX_SHADER_EXTENSIONS 3u +#define VKD3D_MAX_SHADER_STAGES 5u +#define VKD3D_MAX_VK_SYNC_OBJECTS 4u +#define VKD3D_MAX_DESCRIPTOR_SETS 64u +/* D3D12 binding tier 3 has a limit of 2048 samplers. */ +#define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u +/* The main limitation here is the simple descriptor pool recycling scheme + * requiring each pool to contain all descriptor types used by vkd3d. Limit + * this number to prevent excessive pool memory use. */ +#define VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE (16 * 1024u) + +struct d3d12_command_list; +struct d3d12_device; +struct d3d12_resource; + +struct vkd3d_vk_global_procs +{ + PFN_vkCreateInstance vkCreateInstance; + PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; +}; + +#define DECLARE_VK_PFN(name) PFN_##name name; +struct vkd3d_vk_instance_procs +{ +#define VK_INSTANCE_PFN DECLARE_VK_PFN +#define VK_INSTANCE_EXT_PFN DECLARE_VK_PFN +#include "vulkan_procs.h" +}; + +struct vkd3d_vk_device_procs +{ +#define VK_INSTANCE_PFN DECLARE_VK_PFN +#define VK_DEVICE_PFN DECLARE_VK_PFN +#define VK_DEVICE_EXT_PFN DECLARE_VK_PFN +#include "vulkan_procs.h" +}; +#undef DECLARE_VK_PFN + +HRESULT hresult_from_errno(int rc); +HRESULT hresult_from_vk_result(VkResult vr); +HRESULT hresult_from_vkd3d_result(int vkd3d_result); + +struct vkd3d_device_descriptor_limits +{ + unsigned int uniform_buffer_max_descriptors; + unsigned int sampled_image_max_descriptors; + unsigned int storage_buffer_max_descriptors; + unsigned int storage_image_max_descriptors; + unsigned int sampler_max_descriptors; +}; + +struct vkd3d_vulkan_info +{ + /* KHR instance extensions */ + bool KHR_get_physical_device_properties2; + /* EXT instance extensions */ + bool EXT_debug_report; + + /* KHR device extensions */ + bool KHR_dedicated_allocation; + bool KHR_draw_indirect_count; + bool KHR_get_memory_requirements2; + bool KHR_image_format_list; + bool KHR_maintenance3; + bool KHR_push_descriptor; + bool KHR_sampler_mirror_clamp_to_edge; + /* EXT device extensions */ + bool EXT_calibrated_timestamps; + bool EXT_conditional_rendering; + bool EXT_debug_marker; + bool EXT_depth_clip_enable; + bool EXT_descriptor_indexing; + bool EXT_shader_demote_to_helper_invocation; + bool EXT_shader_stencil_export; + bool EXT_texel_buffer_alignment; + bool EXT_transform_feedback; + bool EXT_vertex_attribute_divisor; + + bool rasterization_stream; + bool transform_feedback_queries; + + bool vertex_attrib_zero_divisor; + unsigned int max_vertex_attrib_divisor; + + VkPhysicalDeviceLimits device_limits; + VkPhysicalDeviceSparseProperties sparse_properties; + struct vkd3d_device_descriptor_limits descriptor_limits; + + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; + + unsigned int shader_extension_count; + enum vkd3d_shader_spirv_extension shader_extensions[VKD3D_MAX_SHADER_EXTENSIONS]; + + D3D_FEATURE_LEVEL max_feature_level; +}; + +enum vkd3d_config_flags +{ + VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001, +}; + +struct vkd3d_instance +{ + VkInstance vk_instance; + struct vkd3d_vk_instance_procs vk_procs; + + PFN_vkd3d_signal_event signal_event; + PFN_vkd3d_create_thread create_thread; + PFN_vkd3d_join_thread join_thread; + size_t wchar_size; + + struct vkd3d_vulkan_info vk_info; + struct vkd3d_vk_global_procs vk_global_procs; + void *libvulkan; + + uint64_t config_flags; + enum vkd3d_api_version api_version; + + VkDebugReportCallbackEXT vk_debug_callback; + + uint64_t host_ticks_per_second; + + LONG refcount; +}; + +#ifdef _WIN32 + +union vkd3d_thread_handle +{ + void *handle; +}; + +struct vkd3d_mutex +{ + CRITICAL_SECTION lock; +}; + +struct vkd3d_cond +{ + CONDITION_VARIABLE cond; +}; + +static inline int vkd3d_mutex_init(struct vkd3d_mutex *lock) +{ + InitializeCriticalSection(&lock->lock); + return 0; +} + +static inline int vkd3d_mutex_lock(struct vkd3d_mutex *lock) +{ + EnterCriticalSection(&lock->lock); + return 0; +} + +static inline int vkd3d_mutex_unlock(struct vkd3d_mutex *lock) +{ + LeaveCriticalSection(&lock->lock); + return 0; +} + +static inline int vkd3d_mutex_destroy(struct vkd3d_mutex *lock) +{ + DeleteCriticalSection(&lock->lock); + return 0; +} + +static inline int vkd3d_cond_init(struct vkd3d_cond *cond) +{ + InitializeConditionVariable(&cond->cond); + return 0; +} + +static inline int vkd3d_cond_signal(struct vkd3d_cond *cond) +{ + WakeConditionVariable(&cond->cond); + return 0; +} + +static inline int vkd3d_cond_broadcast(struct vkd3d_cond *cond) +{ + WakeAllConditionVariable(&cond->cond); + return 0; +} + +static inline int vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) +{ + return !SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE); +} + +static inline int vkd3d_cond_destroy(struct vkd3d_cond *cond) +{ + return 0; +} + +#else /* _WIN32 */ + +#include + +union vkd3d_thread_handle +{ + pthread_t pthread; + void *handle; +}; + +struct vkd3d_mutex +{ + pthread_mutex_t lock; +}; + +struct vkd3d_cond +{ + pthread_cond_t cond; +}; + + +static inline int vkd3d_mutex_init(struct vkd3d_mutex *lock) +{ + return pthread_mutex_init(&lock->lock, NULL); +} + +static inline int vkd3d_mutex_lock(struct vkd3d_mutex *lock) +{ + return pthread_mutex_lock(&lock->lock); +} + +static inline int vkd3d_mutex_unlock(struct vkd3d_mutex *lock) +{ + return pthread_mutex_unlock(&lock->lock); +} + +static inline int vkd3d_mutex_destroy(struct vkd3d_mutex *lock) +{ + return pthread_mutex_destroy(&lock->lock); +} + +static inline int vkd3d_cond_init(struct vkd3d_cond *cond) +{ + return pthread_cond_init(&cond->cond, NULL); +} + +static inline int vkd3d_cond_signal(struct vkd3d_cond *cond) +{ + return pthread_cond_signal(&cond->cond); +} + +static inline int vkd3d_cond_broadcast(struct vkd3d_cond *cond) +{ + return pthread_cond_broadcast(&cond->cond); +} + +static inline int vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) +{ + return pthread_cond_wait(&cond->cond, &lock->lock); +} + +static inline int vkd3d_cond_destroy(struct vkd3d_cond *cond) +{ + return pthread_cond_destroy(&cond->cond); +} + +#endif /* _WIN32 */ + +HRESULT vkd3d_create_thread(struct vkd3d_instance *instance, + PFN_vkd3d_thread thread_main, void *data, union vkd3d_thread_handle *thread); +HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_handle *thread); + +struct vkd3d_waiting_fence +{ + struct d3d12_fence *fence; + uint64_t value; + struct vkd3d_queue *queue; + uint64_t queue_sequence_number; +}; + +struct vkd3d_fence_worker +{ + union vkd3d_thread_handle thread; + struct vkd3d_mutex mutex; + struct vkd3d_cond cond; + struct vkd3d_cond fence_destruction_cond; + bool should_exit; + bool pending_fence_destruction; + + LONG enqueued_fence_count; + struct vkd3d_enqueued_fence + { + VkFence vk_fence; + struct vkd3d_waiting_fence waiting_fence; + } *enqueued_fences; + size_t enqueued_fences_size; + + size_t fence_count; + VkFence *vk_fences; + size_t vk_fences_size; + struct vkd3d_waiting_fence *fences; + size_t fences_size; + + struct d3d12_device *device; +}; + +HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, struct d3d12_device *device); +HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, struct d3d12_device *device); + +struct vkd3d_gpu_va_allocation +{ + D3D12_GPU_VIRTUAL_ADDRESS base; + size_t size; + void *ptr; +}; + +struct vkd3d_gpu_va_slab +{ + size_t size; + void *ptr; +}; + +struct vkd3d_gpu_va_allocator +{ + struct vkd3d_mutex mutex; + + D3D12_GPU_VIRTUAL_ADDRESS fallback_floor; + struct vkd3d_gpu_va_allocation *fallback_allocations; + size_t fallback_allocations_size; + size_t fallback_allocation_count; + + struct vkd3d_gpu_va_slab *slabs; + struct vkd3d_gpu_va_slab *free_slab; +}; + +D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_allocator *allocator, + size_t alignment, size_t size, void *ptr); +void *vkd3d_gpu_va_allocator_dereference(struct vkd3d_gpu_va_allocator *allocator, D3D12_GPU_VIRTUAL_ADDRESS address); +void vkd3d_gpu_va_allocator_free(struct vkd3d_gpu_va_allocator *allocator, D3D12_GPU_VIRTUAL_ADDRESS address); + +struct vkd3d_gpu_descriptor_allocation +{ + const struct d3d12_desc *base; + size_t count; +}; + +struct vkd3d_gpu_descriptor_allocator +{ + struct vkd3d_mutex mutex; + + struct vkd3d_gpu_descriptor_allocation *allocations; + size_t allocations_size; + size_t allocation_count; +}; + +size_t vkd3d_gpu_descriptor_allocator_range_size_from_descriptor( + struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc); +bool vkd3d_gpu_descriptor_allocator_register_range(struct vkd3d_gpu_descriptor_allocator *allocator, + const struct d3d12_desc *base, size_t count); +bool vkd3d_gpu_descriptor_allocator_unregister_range( + struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *base); +struct d3d12_descriptor_heap *vkd3d_gpu_descriptor_allocator_heap_from_descriptor( + struct vkd3d_gpu_descriptor_allocator *allocator, const struct d3d12_desc *desc); + +struct vkd3d_render_pass_key +{ + unsigned int attachment_count; + bool depth_enable; + bool stencil_enable; + bool depth_stencil_write; + bool padding; + unsigned int sample_count; + VkFormat vk_formats[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT + 1]; +}; + +struct vkd3d_render_pass_entry; + +struct vkd3d_render_pass_cache +{ + struct vkd3d_render_pass_entry *render_passes; + size_t render_pass_count; + size_t render_passes_size; +}; + +void vkd3d_render_pass_cache_cleanup(struct vkd3d_render_pass_cache *cache, struct d3d12_device *device); +HRESULT vkd3d_render_pass_cache_find(struct vkd3d_render_pass_cache *cache, struct d3d12_device *device, + const struct vkd3d_render_pass_key *key, VkRenderPass *vk_render_pass); +void vkd3d_render_pass_cache_init(struct vkd3d_render_pass_cache *cache); + +struct vkd3d_private_store +{ + struct vkd3d_mutex mutex; + + struct list content; +}; + +struct vkd3d_private_data +{ + struct list entry; + + GUID tag; + unsigned int size; + bool is_object; + union + { + BYTE data[1]; + IUnknown *object; + } u; +}; + +static inline void vkd3d_private_data_destroy(struct vkd3d_private_data *data) +{ + if (data->is_object) + IUnknown_Release(data->u.object); + list_remove(&data->entry); + vkd3d_free(data); +} + +static inline HRESULT vkd3d_private_store_init(struct vkd3d_private_store *store) +{ + int rc; + + list_init(&store->content); + + if ((rc = vkd3d_mutex_init(&store->mutex))) + ERR("Failed to initialize mutex, error %d.\n", rc); + + return hresult_from_errno(rc); +} + +static inline void vkd3d_private_store_destroy(struct vkd3d_private_store *store) +{ + struct vkd3d_private_data *data, *cursor; + + LIST_FOR_EACH_ENTRY_SAFE(data, cursor, &store->content, struct vkd3d_private_data, entry) + { + vkd3d_private_data_destroy(data); + } + + vkd3d_mutex_destroy(&store->mutex); +} + +HRESULT vkd3d_get_private_data(struct vkd3d_private_store *store, const GUID *tag, unsigned int *out_size, void *out); +HRESULT vkd3d_set_private_data(struct vkd3d_private_store *store, + const GUID *tag, unsigned int data_size, const void *data); +HRESULT vkd3d_set_private_data_interface(struct vkd3d_private_store *store, const GUID *tag, const IUnknown *object); + +struct vkd3d_signaled_semaphore +{ + struct list entry; + uint64_t value; + VkSemaphore vk_semaphore; + VkFence vk_fence; + bool is_acquired; +}; + +/* ID3D12Fence */ +struct d3d12_fence +{ + ID3D12Fence ID3D12Fence_iface; + LONG refcount; + + uint64_t value; + struct vkd3d_mutex mutex; + struct vkd3d_cond null_event_cond; + + struct vkd3d_waiting_event + { + uint64_t value; + HANDLE event; + bool latch; + } *events; + size_t events_size; + size_t event_count; + + struct list semaphores; + unsigned int semaphore_count; + + LONG pending_worker_operation_count; + + VkFence old_vk_fences[VKD3D_MAX_VK_SYNC_OBJECTS]; + + struct d3d12_device *device; + + struct vkd3d_private_store private_store; +}; + +HRESULT d3d12_fence_create(struct d3d12_device *device, uint64_t initial_value, + D3D12_FENCE_FLAGS flags, struct d3d12_fence **fence); + +/* ID3D12Heap */ +struct d3d12_heap +{ + ID3D12Heap ID3D12Heap_iface; + LONG refcount; + + bool is_private; + D3D12_HEAP_DESC desc; + + struct vkd3d_mutex mutex; + + VkDeviceMemory vk_memory; + void *map_ptr; + unsigned int map_count; + uint32_t vk_memory_type; + + struct d3d12_device *device; + + struct vkd3d_private_store private_store; +}; + +HRESULT d3d12_heap_create(struct d3d12_device *device, const D3D12_HEAP_DESC *desc, + const struct d3d12_resource *resource, struct d3d12_heap **heap); +struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface); + +#define VKD3D_RESOURCE_PUBLIC_FLAGS \ + (VKD3D_RESOURCE_INITIAL_STATE_TRANSITION | VKD3D_RESOURCE_PRESENT_STATE_TRANSITION) +#define VKD3D_RESOURCE_EXTERNAL 0x00000004 +#define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 +#define VKD3D_RESOURCE_LINEAR_TILING 0x00000010 + +/* ID3D12Resource */ +struct d3d12_resource +{ + ID3D12Resource ID3D12Resource_iface; + LONG refcount; + LONG internal_refcount; + + D3D12_RESOURCE_DESC desc; + const struct vkd3d_format *format; + + D3D12_GPU_VIRTUAL_ADDRESS gpu_address; + union + { + VkBuffer vk_buffer; + VkImage vk_image; + } u; + unsigned int flags; + + unsigned int map_count; + + struct d3d12_heap *heap; + uint64_t heap_offset; + + D3D12_RESOURCE_STATES initial_state; + D3D12_RESOURCE_STATES present_state; + + struct d3d12_device *device; + + struct vkd3d_private_store private_store; +}; + +static inline bool d3d12_resource_is_buffer(const struct d3d12_resource *resource) +{ + return resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER; +} + +static inline bool d3d12_resource_is_texture(const struct d3d12_resource *resource) +{ + return resource->desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER; +} + +bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource); +HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d12_device *device); + +HRESULT d3d12_committed_resource_create(struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource); +HRESULT d3d12_placed_resource_create(struct d3d12_device *device, struct d3d12_heap *heap, uint64_t heap_offset, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource); +HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource); +struct d3d12_resource *unsafe_impl_from_ID3D12Resource(ID3D12Resource *iface); + +HRESULT vkd3d_allocate_buffer_memory(struct d3d12_device *device, VkBuffer vk_buffer, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + VkDeviceMemory *vk_memory, uint32_t *vk_memory_type, VkDeviceSize *vk_memory_size); +HRESULT vkd3d_create_buffer(struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, VkBuffer *vk_buffer); +HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_ALLOCATION_INFO *allocation_info); + +enum vkd3d_view_type +{ + VKD3D_VIEW_TYPE_BUFFER, + VKD3D_VIEW_TYPE_IMAGE, + VKD3D_VIEW_TYPE_SAMPLER, +}; + +struct vkd3d_view +{ + LONG refcount; + enum vkd3d_view_type type; + union + { + VkBufferView vk_buffer_view; + VkImageView vk_image_view; + VkSampler vk_sampler; + } u; + VkBufferView vk_counter_view; + const struct vkd3d_format *format; + union + { + struct + { + VkDeviceSize offset; + VkDeviceSize size; + } buffer; + struct + { + VkImageViewType vk_view_type; + unsigned int miplevel_idx; + unsigned int layer_idx; + unsigned int layer_count; + } texture; + } info; +}; + +void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device); +void vkd3d_view_incref(struct vkd3d_view *view); + +struct vkd3d_texture_view_desc +{ + VkImageViewType view_type; + const struct vkd3d_format *format; + unsigned int miplevel_idx; + unsigned int miplevel_count; + unsigned int layer_idx; + unsigned int layer_count; + VkImageAspectFlags vk_image_aspect; + VkComponentMapping components; + bool allowed_swizzle; +}; + +bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, + VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view); +bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, + const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view); + +struct d3d12_desc +{ + uint32_t magic; + VkDescriptorType vk_descriptor_type; + union + { + VkDescriptorBufferInfo vk_cbv_info; + struct vkd3d_view *view; + } u; +}; + +static inline struct d3d12_desc *d3d12_desc_from_cpu_handle(D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle) +{ + return (struct d3d12_desc *)cpu_handle.ptr; +} + +static inline struct d3d12_desc *d3d12_desc_from_gpu_handle(D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle) +{ + return (struct d3d12_desc *)(intptr_t)gpu_handle.ptr; +} + +void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); +void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, + struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc); +void d3d12_desc_create_srv(struct d3d12_desc *descriptor, + struct d3d12_device *device, struct d3d12_resource *resource, + const D3D12_SHADER_RESOURCE_VIEW_DESC *desc); +void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, + struct d3d12_resource *resource, struct d3d12_resource *counter_resource, + const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc); +void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device *device, const D3D12_SAMPLER_DESC *desc); +void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); + +bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, + D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view); +HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, + const D3D12_STATIC_SAMPLER_DESC *desc, VkSampler *vk_sampler); + +struct d3d12_rtv_desc +{ + uint32_t magic; + VkSampleCountFlagBits sample_count; + const struct vkd3d_format *format; + uint64_t width; + unsigned int height; + unsigned int layer_count; + struct vkd3d_view *view; + struct d3d12_resource *resource; +}; + +static inline struct d3d12_rtv_desc *d3d12_rtv_desc_from_cpu_handle(D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle) +{ + return (struct d3d12_rtv_desc *)cpu_handle.ptr; +} + +void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_device *device, + struct d3d12_resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc); + +struct d3d12_dsv_desc +{ + uint32_t magic; + VkSampleCountFlagBits sample_count; + const struct vkd3d_format *format; + uint64_t width; + unsigned int height; + unsigned int layer_count; + struct vkd3d_view *view; + struct d3d12_resource *resource; +}; + +static inline struct d3d12_dsv_desc *d3d12_dsv_desc_from_cpu_handle(D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle) +{ + return (struct d3d12_dsv_desc *)cpu_handle.ptr; +} + +void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device, + struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc); + +/* ID3D12DescriptorHeap */ +struct d3d12_descriptor_heap +{ + ID3D12DescriptorHeap ID3D12DescriptorHeap_iface; + LONG refcount; + + D3D12_DESCRIPTOR_HEAP_DESC desc; + + struct d3d12_device *device; + + struct vkd3d_private_store private_store; + + BYTE descriptors[]; +}; + +HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, + const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap); + +/* ID3D12QueryHeap */ +struct d3d12_query_heap +{ + ID3D12QueryHeap ID3D12QueryHeap_iface; + LONG refcount; + + VkQueryPool vk_query_pool; + + struct d3d12_device *device; + + struct vkd3d_private_store private_store; + + uint64_t availability_mask[]; +}; + +HRESULT d3d12_query_heap_create(struct d3d12_device *device, + const D3D12_QUERY_HEAP_DESC *desc, struct d3d12_query_heap **heap); +struct d3d12_query_heap *unsafe_impl_from_ID3D12QueryHeap(ID3D12QueryHeap *iface); + +/* A Vulkan query has to be issued at least one time before the result is + * available. In D3D12 it is legal to get query reults for not issued queries. + */ +static inline bool d3d12_query_heap_is_result_available(const struct d3d12_query_heap *heap, + unsigned int query_index) +{ + unsigned int index = query_index / (sizeof(*heap->availability_mask) * CHAR_BIT); + unsigned int shift = query_index % (sizeof(*heap->availability_mask) * CHAR_BIT); + return heap->availability_mask[index] & ((uint64_t)1 << shift); +} + +static inline void d3d12_query_heap_mark_result_as_available(struct d3d12_query_heap *heap, + unsigned int query_index) +{ + unsigned int index = query_index / (sizeof(*heap->availability_mask) * CHAR_BIT); + unsigned int shift = query_index % (sizeof(*heap->availability_mask) * CHAR_BIT); + heap->availability_mask[index] |= (uint64_t)1 << shift; +} + +struct d3d12_root_descriptor_table_range +{ + unsigned int offset; + unsigned int descriptor_count; + unsigned int vk_binding_count; + uint32_t set; + uint32_t binding; + + enum vkd3d_shader_descriptor_type type; + uint32_t descriptor_magic; + unsigned int register_space; + unsigned int base_register_idx; +}; + +struct d3d12_root_descriptor_table +{ + unsigned int range_count; + struct d3d12_root_descriptor_table_range *ranges; +}; + +struct d3d12_root_constant +{ + VkShaderStageFlags stage_flags; + uint32_t offset; +}; + +struct d3d12_root_descriptor +{ + uint32_t binding; +}; + +struct d3d12_root_parameter +{ + D3D12_ROOT_PARAMETER_TYPE parameter_type; + union + { + struct d3d12_root_constant constant; + struct d3d12_root_descriptor descriptor; + struct d3d12_root_descriptor_table descriptor_table; + } u; +}; + +struct d3d12_descriptor_set_layout +{ + VkDescriptorSetLayout vk_layout; + unsigned int unbounded_offset; + unsigned int table_index; +}; + +/* ID3D12RootSignature */ +struct d3d12_root_signature +{ + ID3D12RootSignature ID3D12RootSignature_iface; + LONG refcount; + + VkPipelineLayout vk_pipeline_layout; + struct d3d12_descriptor_set_layout descriptor_set_layouts[VKD3D_MAX_DESCRIPTOR_SETS]; + uint32_t vk_set_count; + bool use_descriptor_arrays; + + struct d3d12_root_parameter *parameters; + unsigned int parameter_count; + uint32_t main_set; + + uint64_t descriptor_table_mask; + uint32_t push_descriptor_mask; + + D3D12_ROOT_SIGNATURE_FLAGS flags; + + unsigned int binding_count; + struct vkd3d_shader_resource_binding *descriptor_mapping; + struct vkd3d_shader_descriptor_offset *descriptor_offsets; + + unsigned int root_constant_count; + struct vkd3d_shader_push_constant_buffer *root_constants; + + unsigned int root_descriptor_count; + + unsigned int push_constant_range_count; + /* Only a single push constant range may include the same stage in Vulkan. */ + VkPushConstantRange push_constant_ranges[D3D12_SHADER_VISIBILITY_PIXEL + 1]; + + unsigned int static_sampler_count; + VkSampler *static_samplers; + + struct d3d12_device *device; + + struct vkd3d_private_store private_store; +}; + +HRESULT d3d12_root_signature_create(struct d3d12_device *device, const void *bytecode, + size_t bytecode_length, struct d3d12_root_signature **root_signature); +struct d3d12_root_signature *unsafe_impl_from_ID3D12RootSignature(ID3D12RootSignature *iface); + +int vkd3d_parse_root_signature_v_1_0(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_versioned_root_signature_desc *desc); + +struct d3d12_graphics_pipeline_state +{ + VkPipelineShaderStageCreateInfo stages[VKD3D_MAX_SHADER_STAGES]; + size_t stage_count; + + VkVertexInputAttributeDescription attributes[D3D12_VS_INPUT_REGISTER_COUNT]; + VkVertexInputRate input_rates[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; + VkVertexInputBindingDivisorDescriptionEXT instance_divisors[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; + size_t instance_divisor_count; + size_t attribute_count; + + bool om_logic_op_enable; + VkLogicOp om_logic_op; + VkPipelineColorBlendAttachmentState blend_attachments[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT]; + unsigned int rt_count; + unsigned int null_attachment_mask; + VkFormat dsv_format; + VkFormat rtv_formats[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT]; + VkRenderPass render_pass; + + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE index_buffer_strip_cut_value; + VkPipelineRasterizationStateCreateInfo rs_desc; + VkPipelineMultisampleStateCreateInfo ms_desc; + VkPipelineDepthStencilStateCreateInfo ds_desc; + + VkSampleMask sample_mask[2]; + VkPipelineRasterizationDepthClipStateCreateInfoEXT rs_depth_clip_info; + VkPipelineRasterizationStateStreamCreateInfoEXT rs_stream_info; + + const struct d3d12_root_signature *root_signature; + + struct list compiled_pipelines; + + bool xfb_enabled; +}; + +static inline unsigned int dsv_attachment_mask(const struct d3d12_graphics_pipeline_state *graphics) +{ + return 1u << graphics->rt_count; +} + +struct d3d12_compute_pipeline_state +{ + VkPipeline vk_pipeline; +}; + +struct d3d12_pipeline_uav_counter_state +{ + VkPipelineLayout vk_pipeline_layout; + VkDescriptorSetLayout vk_set_layout; + uint32_t set_index; + + struct vkd3d_shader_uav_counter_binding *bindings; + unsigned int binding_count; +}; + +/* ID3D12PipelineState */ +struct d3d12_pipeline_state +{ + ID3D12PipelineState ID3D12PipelineState_iface; + LONG refcount; + + union + { + struct d3d12_graphics_pipeline_state graphics; + struct d3d12_compute_pipeline_state compute; + } u; + VkPipelineBindPoint vk_bind_point; + + struct d3d12_pipeline_uav_counter_state uav_counters; + + struct d3d12_device *device; + + struct vkd3d_private_store private_store; +}; + +static inline bool d3d12_pipeline_state_is_compute(const struct d3d12_pipeline_state *state) +{ + return state && state->vk_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE; +} + +static inline bool d3d12_pipeline_state_is_graphics(const struct d3d12_pipeline_state *state) +{ + return state && state->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS; +} + +static inline bool d3d12_pipeline_state_has_unknown_dsv_format(struct d3d12_pipeline_state *state) +{ + if (d3d12_pipeline_state_is_graphics(state)) + { + struct d3d12_graphics_pipeline_state *graphics = &state->u.graphics; + + return graphics->null_attachment_mask & dsv_attachment_mask(graphics); + } + + return false; +} + +HRESULT d3d12_pipeline_state_create_compute(struct d3d12_device *device, + const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, struct d3d12_pipeline_state **state); +HRESULT d3d12_pipeline_state_create_graphics(struct d3d12_device *device, + const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, struct d3d12_pipeline_state **state); +VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_state *state, + D3D12_PRIMITIVE_TOPOLOGY topology, const uint32_t *strides, VkFormat dsv_format, VkRenderPass *vk_render_pass); +struct d3d12_pipeline_state *unsafe_impl_from_ID3D12PipelineState(ID3D12PipelineState *iface); + +struct vkd3d_buffer +{ + VkBuffer vk_buffer; + VkDeviceMemory vk_memory; +}; + +/* ID3D12CommandAllocator */ +struct d3d12_command_allocator +{ + ID3D12CommandAllocator ID3D12CommandAllocator_iface; + LONG refcount; + + D3D12_COMMAND_LIST_TYPE type; + VkQueueFlags vk_queue_flags; + + VkCommandPool vk_command_pool; + + VkDescriptorPool vk_descriptor_pool; + + VkDescriptorPool *free_descriptor_pools; + size_t free_descriptor_pools_size; + size_t free_descriptor_pool_count; + + VkRenderPass *passes; + size_t passes_size; + size_t pass_count; + + VkFramebuffer *framebuffers; + size_t framebuffers_size; + size_t framebuffer_count; + + VkDescriptorPool *descriptor_pools; + size_t descriptor_pools_size; + size_t descriptor_pool_count; + + struct vkd3d_view **views; + size_t views_size; + size_t view_count; + + VkBufferView *buffer_views; + size_t buffer_views_size; + size_t buffer_view_count; + + struct vkd3d_buffer *transfer_buffers; + size_t transfer_buffers_size; + size_t transfer_buffer_count; + + VkCommandBuffer *command_buffers; + size_t command_buffers_size; + size_t command_buffer_count; + + struct d3d12_command_list *current_command_list; + struct d3d12_device *device; + + struct vkd3d_private_store private_store; +}; + +HRESULT d3d12_command_allocator_create(struct d3d12_device *device, + D3D12_COMMAND_LIST_TYPE type, struct d3d12_command_allocator **allocator); + +struct vkd3d_push_descriptor +{ + union + { + VkBufferView vk_buffer_view; + struct + { + VkBuffer vk_buffer; + VkDeviceSize offset; + } cbv; + } u; +}; + +struct vkd3d_pipeline_bindings +{ + const struct d3d12_root_signature *root_signature; + + VkPipelineBindPoint vk_bind_point; + /* All descriptor sets at index > 1 are for unbounded d3d12 ranges. Set + * 0 or 1 may be unbounded too. */ + size_t descriptor_set_count; + VkDescriptorSet descriptor_sets[VKD3D_MAX_DESCRIPTOR_SETS]; + bool in_use; + + struct d3d12_desc *descriptor_tables[D3D12_MAX_ROOT_COST]; + uint64_t descriptor_table_dirty_mask; + uint64_t descriptor_table_active_mask; + + VkBufferView *vk_uav_counter_views; + size_t vk_uav_counter_views_size; + bool uav_counters_dirty; + + /* Needed when VK_KHR_push_descriptor is not available. */ + struct vkd3d_push_descriptor push_descriptors[D3D12_MAX_ROOT_COST / 2]; + uint32_t push_descriptor_dirty_mask; + uint32_t push_descriptor_active_mask; +}; + +enum vkd3d_pipeline_bind_point +{ + VKD3D_PIPELINE_BIND_POINT_GRAPHICS = 0x0, + VKD3D_PIPELINE_BIND_POINT_COMPUTE = 0x1, + VKD3D_PIPELINE_BIND_POINT_COUNT = 0x2, +}; + +/* ID3D12CommandList */ +struct d3d12_command_list +{ + ID3D12GraphicsCommandList2 ID3D12GraphicsCommandList2_iface; + LONG refcount; + + D3D12_COMMAND_LIST_TYPE type; + VkQueueFlags vk_queue_flags; + + bool is_recording; + bool is_valid; + VkCommandBuffer vk_command_buffer; + + uint32_t strides[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; + D3D12_PRIMITIVE_TOPOLOGY primitive_topology; + + DXGI_FORMAT index_buffer_format; + + VkImageView rtvs[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT]; + VkImageView dsv; + unsigned int fb_width; + unsigned int fb_height; + unsigned int fb_layer_count; + VkFormat dsv_format; + + bool xfb_enabled; + + bool is_predicated; + + VkFramebuffer current_framebuffer; + VkPipeline current_pipeline; + VkRenderPass pso_render_pass; + VkRenderPass current_render_pass; + struct vkd3d_pipeline_bindings pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COUNT]; + + struct d3d12_pipeline_state *state; + + struct d3d12_command_allocator *allocator; + struct d3d12_device *device; + + VkBuffer so_counter_buffers[D3D12_SO_BUFFER_SLOT_COUNT]; + VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT]; + + struct vkd3d_private_store private_store; +}; + +HRESULT d3d12_command_list_create(struct d3d12_device *device, + UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *allocator_iface, + ID3D12PipelineState *initial_pipeline_state, struct d3d12_command_list **list); + +struct vkd3d_queue +{ + /* Access to VkQueue must be externally synchronized. */ + struct vkd3d_mutex mutex; + + VkQueue vk_queue; + + uint64_t completed_sequence_number; + uint64_t submitted_sequence_number; + + uint32_t vk_family_index; + VkQueueFlags vk_queue_flags; + uint32_t timestamp_bits; + + struct + { + VkSemaphore vk_semaphore; + uint64_t sequence_number; + } *semaphores; + size_t semaphores_size; + size_t semaphore_count; + + VkSemaphore old_vk_semaphores[VKD3D_MAX_VK_SYNC_OBJECTS]; +}; + +VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue); +HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, + const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue); +void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device); +void vkd3d_queue_release(struct vkd3d_queue *queue); + +/* ID3D12CommandQueue */ +struct d3d12_command_queue +{ + ID3D12CommandQueue ID3D12CommandQueue_iface; + LONG refcount; + + D3D12_COMMAND_QUEUE_DESC desc; + + struct vkd3d_queue *vkd3d_queue; + + const struct d3d12_fence *last_waited_fence; + uint64_t last_waited_fence_value; + + struct d3d12_device *device; + + struct vkd3d_private_store private_store; +}; + +HRESULT d3d12_command_queue_create(struct d3d12_device *device, + const D3D12_COMMAND_QUEUE_DESC *desc, struct d3d12_command_queue **queue); + +/* ID3D12CommandSignature */ +struct d3d12_command_signature +{ + ID3D12CommandSignature ID3D12CommandSignature_iface; + LONG refcount; + + D3D12_COMMAND_SIGNATURE_DESC desc; + + struct d3d12_device *device; + + struct vkd3d_private_store private_store; +}; + +HRESULT d3d12_command_signature_create(struct d3d12_device *device, + const D3D12_COMMAND_SIGNATURE_DESC *desc, struct d3d12_command_signature **signature); +struct d3d12_command_signature *unsafe_impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface); + +/* NULL resources */ +struct vkd3d_null_resources +{ + VkBuffer vk_buffer; + VkDeviceMemory vk_buffer_memory; + + VkBuffer vk_storage_buffer; + VkDeviceMemory vk_storage_buffer_memory; + + VkImage vk_2d_image; + VkDeviceMemory vk_2d_image_memory; + + VkImage vk_2d_storage_image; + VkDeviceMemory vk_2d_storage_image_memory; +}; + +HRESULT vkd3d_init_null_resources(struct vkd3d_null_resources *null_resources, struct d3d12_device *device); +void vkd3d_destroy_null_resources(struct vkd3d_null_resources *null_resources, struct d3d12_device *device); + +struct vkd3d_format_compatibility_list +{ + DXGI_FORMAT typeless_format; + unsigned int format_count; + VkFormat vk_formats[VKD3D_MAX_COMPATIBLE_FORMAT_COUNT]; +}; + +struct vkd3d_uav_clear_args +{ + VkClearColorValue colour; + VkOffset2D offset; + VkExtent2D extent; +}; + +struct vkd3d_uav_clear_pipelines +{ + VkPipeline buffer; + VkPipeline image_1d; + VkPipeline image_1d_array; + VkPipeline image_2d; + VkPipeline image_2d_array; + VkPipeline image_3d; +}; + +struct vkd3d_uav_clear_state +{ + VkDescriptorSetLayout vk_set_layout_buffer; + VkDescriptorSetLayout vk_set_layout_image; + + VkPipelineLayout vk_pipeline_layout_buffer; + VkPipelineLayout vk_pipeline_layout_image; + + struct vkd3d_uav_clear_pipelines pipelines_float; + struct vkd3d_uav_clear_pipelines pipelines_uint; +}; + +HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); +void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); + +#define VKD3D_DESCRIPTOR_POOL_COUNT 6 + +/* ID3D12Device */ +struct d3d12_device +{ + ID3D12Device ID3D12Device_iface; + LONG refcount; + + VkDevice vk_device; + VkPhysicalDevice vk_physical_device; + struct vkd3d_vk_device_procs vk_procs; + PFN_vkd3d_signal_event signal_event; + size_t wchar_size; + + struct vkd3d_gpu_descriptor_allocator gpu_descriptor_allocator; + struct vkd3d_gpu_va_allocator gpu_va_allocator; + struct vkd3d_fence_worker fence_worker; + + struct vkd3d_mutex mutex; + struct vkd3d_mutex desc_mutex[8]; + struct vkd3d_render_pass_cache render_pass_cache; + VkPipelineCache vk_pipeline_cache; + + VkPhysicalDeviceMemoryProperties memory_properties; + + D3D12_FEATURE_DATA_D3D12_OPTIONS feature_options; + D3D12_FEATURE_DATA_D3D12_OPTIONS1 feature_options1; + D3D12_FEATURE_DATA_D3D12_OPTIONS2 feature_options2; + D3D12_FEATURE_DATA_D3D12_OPTIONS3 feature_options3; + D3D12_FEATURE_DATA_D3D12_OPTIONS4 feature_options4; + D3D12_FEATURE_DATA_D3D12_OPTIONS5 feature_options5; + + struct vkd3d_vulkan_info vk_info; + + struct vkd3d_queue *direct_queue; + struct vkd3d_queue *compute_queue; + struct vkd3d_queue *copy_queue; + uint32_t queue_family_indices[VKD3D_MAX_QUEUE_FAMILY_COUNT]; + unsigned int queue_family_count; + VkTimeDomainEXT vk_host_time_domain; + + struct vkd3d_instance *vkd3d_instance; + + IUnknown *parent; + LUID adapter_luid; + + struct vkd3d_private_store private_store; + + HRESULT removed_reason; + + const struct vkd3d_format *depth_stencil_formats; + unsigned int format_compatibility_list_count; + const struct vkd3d_format_compatibility_list *format_compatibility_lists; + struct vkd3d_null_resources null_resources; + struct vkd3d_uav_clear_state uav_clear_state; + + VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; +}; + +HRESULT d3d12_device_create(struct vkd3d_instance *instance, + const struct vkd3d_device_create_info *create_info, struct d3d12_device **device); +struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3D12_COMMAND_LIST_TYPE type); +bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); +void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, + const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); +struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface); + +static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) +{ + return ID3D12Device_QueryInterface(&device->ID3D12Device_iface, iid, object); +} + +static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) +{ + return ID3D12Device_AddRef(&device->ID3D12Device_iface); +} + +static inline ULONG d3d12_device_release(struct d3d12_device *device) +{ + return ID3D12Device_Release(&device->ID3D12Device_iface); +} + +static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) +{ + return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); +} + +static inline struct vkd3d_mutex *d3d12_device_get_descriptor_mutex(struct d3d12_device *device, + const struct d3d12_desc *descriptor) +{ + STATIC_ASSERT(!(ARRAY_SIZE(device->desc_mutex) & (ARRAY_SIZE(device->desc_mutex) - 1))); + uintptr_t idx = (uintptr_t)descriptor; + + idx ^= idx >> 12; + idx ^= idx >> 6; + idx ^= idx >> 3; + + return &device->desc_mutex[idx & (ARRAY_SIZE(device->desc_mutex) - 1)]; +} + +/* utils */ +enum vkd3d_format_type +{ + VKD3D_FORMAT_TYPE_OTHER, + VKD3D_FORMAT_TYPE_TYPELESS, + VKD3D_FORMAT_TYPE_SINT, + VKD3D_FORMAT_TYPE_UINT, +}; + +struct vkd3d_format +{ + DXGI_FORMAT dxgi_format; + VkFormat vk_format; + size_t byte_count; + size_t block_width; + size_t block_height; + size_t block_byte_count; + VkImageAspectFlags vk_aspect_mask; + unsigned int plane_count; + enum vkd3d_format_type type; + bool is_emulated; +}; + +static inline size_t vkd3d_format_get_data_offset(const struct vkd3d_format *format, + unsigned int row_pitch, unsigned int slice_pitch, + unsigned int x, unsigned int y, unsigned int z) +{ + return z * slice_pitch + + (y / format->block_height) * row_pitch + + (x / format->block_width) * format->byte_count * format->block_byte_count; +} + +static inline bool vkd3d_format_is_compressed(const struct vkd3d_format *format) +{ + return format->block_byte_count != 1; +} + +void vkd3d_format_copy_data(const struct vkd3d_format *format, const uint8_t *src, + unsigned int src_row_pitch, unsigned int src_slice_pitch, uint8_t *dst, unsigned int dst_row_pitch, + unsigned int dst_slice_pitch, unsigned int w, unsigned int h, unsigned int d); + +const struct vkd3d_format *vkd3d_get_format(const struct d3d12_device *device, + DXGI_FORMAT dxgi_format, bool depth_stencil); +const struct vkd3d_format *vkd3d_find_uint_format(const struct d3d12_device *device, DXGI_FORMAT dxgi_format); + +HRESULT vkd3d_init_format_info(struct d3d12_device *device); +void vkd3d_cleanup_format_info(struct d3d12_device *device); + +static inline const struct vkd3d_format *vkd3d_format_from_d3d12_resource_desc( + const struct d3d12_device *device, const D3D12_RESOURCE_DESC *desc, DXGI_FORMAT view_format) +{ + return vkd3d_get_format(device, view_format ? view_format : desc->Format, + desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); +} + +static inline bool d3d12_box_is_empty(const D3D12_BOX *box) +{ + return box->right <= box->left || box->bottom <= box->top || box->back <= box->front; +} + +static inline unsigned int d3d12_resource_desc_get_width(const D3D12_RESOURCE_DESC *desc, + unsigned int miplevel_idx) +{ + return max(1, desc->Width >> miplevel_idx); +} + +static inline unsigned int d3d12_resource_desc_get_height(const D3D12_RESOURCE_DESC *desc, + unsigned int miplevel_idx) +{ + return max(1, desc->Height >> miplevel_idx); +} + +static inline unsigned int d3d12_resource_desc_get_depth(const D3D12_RESOURCE_DESC *desc, + unsigned int miplevel_idx) +{ + unsigned int d = desc->Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D ? 1 : desc->DepthOrArraySize; + return max(1, d >> miplevel_idx); +} + +static inline unsigned int d3d12_resource_desc_get_layer_count(const D3D12_RESOURCE_DESC *desc) +{ + return desc->Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D ? desc->DepthOrArraySize : 1; +} + +static inline unsigned int d3d12_resource_desc_get_sub_resource_count(const D3D12_RESOURCE_DESC *desc) +{ + return d3d12_resource_desc_get_layer_count(desc) * desc->MipLevels; +} + +static inline unsigned int vkd3d_compute_workgroup_count(unsigned int thread_count, unsigned int workgroup_size) +{ + return (thread_count + workgroup_size - 1) / workgroup_size; +} + +VkCompareOp vk_compare_op_from_d3d12(D3D12_COMPARISON_FUNC op); +VkSampleCountFlagBits vk_samples_from_dxgi_sample_desc(const DXGI_SAMPLE_DESC *desc); +VkSampleCountFlagBits vk_samples_from_sample_count(unsigned int sample_count); + +bool is_valid_feature_level(D3D_FEATURE_LEVEL feature_level); + +bool is_valid_resource_state(D3D12_RESOURCE_STATES state); +bool is_write_resource_state(D3D12_RESOURCE_STATES state); + +HRESULT return_interface(void *iface, REFIID iface_iid, REFIID requested_iid, void **object); + +const char *debug_d3d12_box(const D3D12_BOX *box); +const char *debug_d3d12_shader_component_mapping(unsigned int mapping); +const char *debug_vk_extent_3d(VkExtent3D extent); +const char *debug_vk_memory_heap_flags(VkMemoryHeapFlags flags); +const char *debug_vk_memory_property_flags(VkMemoryPropertyFlags flags); +const char *debug_vk_queue_flags(VkQueueFlags flags); + +static inline void debug_ignored_node_mask(unsigned int mask) +{ + if (mask && mask != 1) + FIXME("Ignoring node mask 0x%08x.\n", mask); +} + +HRESULT vkd3d_load_vk_global_procs(struct vkd3d_vk_global_procs *procs, + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr); +HRESULT vkd3d_load_vk_instance_procs(struct vkd3d_vk_instance_procs *procs, + const struct vkd3d_vk_global_procs *global_procs, VkInstance instance); +HRESULT vkd3d_load_vk_device_procs(struct vkd3d_vk_device_procs *procs, + const struct vkd3d_vk_instance_procs *parent_procs, VkDevice device); + +extern const char vkd3d_build[]; + +bool vkd3d_get_program_name(char program_name[PATH_MAX]); + +static inline void vkd3d_set_thread_name(const char *name) +{ +#if defined(HAVE_PTHREAD_SETNAME_NP_2) + pthread_setname_np(pthread_self(), name); +#elif defined(HAVE_PTHREAD_SETNAME_NP_1) + pthread_setname_np(name); +#endif +} + +VkResult vkd3d_set_vk_object_name_utf8(struct d3d12_device *device, uint64_t vk_object, + VkDebugReportObjectTypeEXT vk_object_type, const char *name); +HRESULT vkd3d_set_vk_object_name(struct d3d12_device *device, uint64_t vk_object, + VkDebugReportObjectTypeEXT vk_object_type, const WCHAR *name); + +static inline void vk_prepend_struct(void *header, void *structure) +{ + VkBaseOutStructure *vk_header = header, *vk_structure = structure; + + assert(!vk_structure->pNext); + vk_structure->pNext = vk_header->pNext; + vk_header->pNext = vk_structure; +} + +static inline void vkd3d_prepend_struct(void *header, void *structure) +{ + struct + { + unsigned int type; + const void *next; + } *vkd3d_header = header, *vkd3d_structure = structure; + + assert(!vkd3d_structure->next); + vkd3d_structure->next = vkd3d_header->next; + vkd3d_header->next = vkd3d_structure; +} + +#endif /* __VKD3D_PRIVATE_H */ diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_shaders.h b/libs/vkd3d/libs/vkd3d/vkd3d_shaders.h new file mode 100644 index 00000000000..b2a90cdbf3c --- /dev/null +++ b/libs/vkd3d/libs/vkd3d/vkd3d_shaders.h @@ -0,0 +1,388 @@ +/* + * Copyright 2019 Philip Rebohle + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_SHADERS_H +#define __VKD3D_SHADERS_H + +static const uint32_t cs_uav_clear_buffer_float_code[] = +{ +#if 0 + RWBuffer dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(128, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0xe114ba61, 0xff6a0d0b, 0x7b25c8f4, 0xfcf7cf22, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400089c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000080, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_buffer_uint_code[] = +{ +#if 0 + RWBuffer dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(128, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0x3afd0cfd, 0x5145c166, 0x5b9f76b8, 0xa73775cd, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400089c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000080, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_array_float_code[] = +{ +#if 0 + RWTexture1DArray dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[int2(u_info.dst_offset.x + thread_id.x, thread_id.y)] = u_info.clear_value; + } +#endif + 0x43425844, 0x3d73bc2d, 0x2b635f3d, 0x6bf98e92, 0xbe0aa5d9, 0x00000001, 0x0000011c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000c8, 0x00050050, 0x00000032, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400389c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x04000036, 0x001000e2, 0x00000000, 0x00020556, 0x080000a4, 0x0011e0f2, 0x00000000, + 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_array_uint_code[] = +{ +#if 0 + RWTexture1DArray dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[int2(u_info.dst_offset.x + thread_id.x, thread_id.y)] = u_info.clear_value; + } +#endif + 0x43425844, 0x2f0ca457, 0x72068b34, 0xd9dadc2b, 0xd3178c3e, 0x00000001, 0x0000011c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000c8, 0x00050050, 0x00000032, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400389c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x04000036, 0x001000e2, 0x00000000, 0x00020556, 0x080000a4, 0x0011e0f2, 0x00000000, + 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_float_code[] = +{ +#if 0 + RWTexture1D dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0x05266503, 0x4b97006f, 0x01a5cc63, 0xe617d0a1, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400109c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_uint_code[] = +{ +#if 0 + RWTexture1D dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0x19d5c8f2, 0x3ca4ac24, 0x9e258499, 0xf0463fd6, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400109c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_array_float_code[] = +{ +#if 0 + RWTexture2DArray dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy + thread_id.xy, thread_id.z)] = u_info.clear_value; + } +#endif + 0x43425844, 0x924d2d2c, 0xb9166376, 0x99f83871, 0x8ef65025, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400409c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_array_uint_code[] = +{ +#if 0 + RWTexture2DArray dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy + thread_id.xy, thread_id.z)] = u_info.clear_value; + } +#endif + 0x43425844, 0xa92219d4, 0xa2c5e47d, 0x0d308500, 0xf32197b4, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400409c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_float_code[] = +{ +#if 0 + RWTexture2D dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[u_info.dst_offset.xy + thread_id.xy] = u_info.clear_value; + } +#endif + 0x43425844, 0x6e735b3f, 0x7348c4fa, 0xb3634e42, 0x50e2d99b, 0x00000001, 0x00000128, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000d4, 0x00050050, 0x00000035, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400189c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x001000f2, 0x00000000, 0x00020546, 0x00208546, 0x00000000, 0x00000001, + 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, + 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_uint_code[] = +{ +#if 0 + RWTexture2D dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[u_info.dst_offset.xy + thread_id.xy] = u_info.clear_value; + } +#endif + 0x43425844, 0xf01db5dd, 0xc7dc5e55, 0xb017c1a8, 0x55abd52d, 0x00000001, 0x00000128, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000d4, 0x00050050, 0x00000035, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400189c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x001000f2, 0x00000000, 0x00020546, 0x00208546, 0x00000000, 0x00000001, + 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, + 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_3d_float_code[] = +{ +#if 0 + RWTexture3D dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy, 0) + thread_id.xyz] = u_info.clear_value; + } +#endif + 0x43425844, 0x5d8f36a0, 0x30fa86a5, 0xfec7f2ef, 0xdfd76cbb, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400289c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_3d_uint_code[] = +{ +#if 0 + RWTexture3D dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy, 0) + thread_id.xyz] = u_info.clear_value; + } +#endif + 0x43425844, 0x5b9c95b1, 0xc9bde4e3, 0x9aaff806, 0x24a1d264, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400289c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +#endif /* __VKD3D_SHADERS_H */ diff --git a/libs/vkd3d/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/libs/vkd3d/vulkan_procs.h new file mode 100644 index 00000000000..60556735f8b --- /dev/null +++ b/libs/vkd3d/libs/vkd3d/vulkan_procs.h @@ -0,0 +1,218 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef VK_INSTANCE_PFN +# define VK_INSTANCE_PFN(x) +#endif + +#ifndef VK_INSTANCE_EXT_PFN +# define VK_INSTANCE_EXT_PFN(x) +#endif + +#ifndef VK_DEVICE_PFN +# define VK_DEVICE_PFN(x) +#endif + +#ifndef VK_DEVICE_EXT_PFN +# define VK_DEVICE_EXT_PFN(x) +#endif + +/* Instance functions (obtained by vkGetInstanceProcAddr). */ +VK_INSTANCE_PFN(vkDestroyInstance) /* Load vkDestroyInstance() first. */ +VK_INSTANCE_PFN(vkCreateDevice) +VK_INSTANCE_PFN(vkEnumerateDeviceExtensionProperties) +VK_INSTANCE_PFN(vkEnumerateDeviceLayerProperties) +VK_INSTANCE_PFN(vkEnumeratePhysicalDevices) +VK_INSTANCE_PFN(vkGetDeviceProcAddr) +VK_INSTANCE_PFN(vkGetPhysicalDeviceFeatures) +VK_INSTANCE_PFN(vkGetPhysicalDeviceFormatProperties) +VK_INSTANCE_PFN(vkGetPhysicalDeviceImageFormatProperties) +VK_INSTANCE_PFN(vkGetPhysicalDeviceMemoryProperties) +VK_INSTANCE_PFN(vkGetPhysicalDeviceProperties) +VK_INSTANCE_PFN(vkGetPhysicalDeviceQueueFamilyProperties) +VK_INSTANCE_PFN(vkGetPhysicalDeviceSparseImageFormatProperties) + +/* VK_KHR_get_physical_device_properties2 */ +VK_INSTANCE_EXT_PFN(vkGetPhysicalDeviceFeatures2KHR) +VK_INSTANCE_EXT_PFN(vkGetPhysicalDeviceProperties2KHR) + +/* VK_EXT_debug_report */ +VK_INSTANCE_EXT_PFN(vkCreateDebugReportCallbackEXT) +VK_INSTANCE_EXT_PFN(vkDestroyDebugReportCallbackEXT) + +/* VK_EXT_calibrated_timestamps */ +VK_INSTANCE_EXT_PFN(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT) + +/* Device functions (obtained by vkGetDeviceProcAddr). */ +VK_DEVICE_PFN(vkDestroyDevice) /* Load vkDestroyDevice() first. */ +VK_DEVICE_PFN(vkAllocateCommandBuffers) +VK_DEVICE_PFN(vkAllocateDescriptorSets) +VK_DEVICE_PFN(vkAllocateMemory) +VK_DEVICE_PFN(vkBeginCommandBuffer) +VK_DEVICE_PFN(vkBindBufferMemory) +VK_DEVICE_PFN(vkBindImageMemory) +VK_DEVICE_PFN(vkCmdBeginQuery) +VK_DEVICE_PFN(vkCmdBeginRenderPass) +VK_DEVICE_PFN(vkCmdBindDescriptorSets) +VK_DEVICE_PFN(vkCmdBindIndexBuffer) +VK_DEVICE_PFN(vkCmdBindPipeline) +VK_DEVICE_PFN(vkCmdBindVertexBuffers) +VK_DEVICE_PFN(vkCmdBlitImage) +VK_DEVICE_PFN(vkCmdClearAttachments) +VK_DEVICE_PFN(vkCmdClearColorImage) +VK_DEVICE_PFN(vkCmdClearDepthStencilImage) +VK_DEVICE_PFN(vkCmdCopyBuffer) +VK_DEVICE_PFN(vkCmdCopyBufferToImage) +VK_DEVICE_PFN(vkCmdCopyImage) +VK_DEVICE_PFN(vkCmdCopyImageToBuffer) +VK_DEVICE_PFN(vkCmdCopyQueryPoolResults) +VK_DEVICE_PFN(vkCmdDispatch) +VK_DEVICE_PFN(vkCmdDispatchIndirect) +VK_DEVICE_PFN(vkCmdDraw) +VK_DEVICE_PFN(vkCmdDrawIndexed) +VK_DEVICE_PFN(vkCmdDrawIndexedIndirect) +VK_DEVICE_PFN(vkCmdDrawIndirect) +VK_DEVICE_PFN(vkCmdEndQuery) +VK_DEVICE_PFN(vkCmdEndRenderPass) +VK_DEVICE_PFN(vkCmdExecuteCommands) +VK_DEVICE_PFN(vkCmdFillBuffer) +VK_DEVICE_PFN(vkCmdNextSubpass) +VK_DEVICE_PFN(vkCmdPipelineBarrier) +VK_DEVICE_PFN(vkCmdPushConstants) +VK_DEVICE_PFN(vkCmdResetEvent) +VK_DEVICE_PFN(vkCmdResetQueryPool) +VK_DEVICE_PFN(vkCmdResolveImage) +VK_DEVICE_PFN(vkCmdSetBlendConstants) +VK_DEVICE_PFN(vkCmdSetDepthBias) +VK_DEVICE_PFN(vkCmdSetDepthBounds) +VK_DEVICE_PFN(vkCmdSetEvent) +VK_DEVICE_PFN(vkCmdSetLineWidth) +VK_DEVICE_PFN(vkCmdSetScissor) +VK_DEVICE_PFN(vkCmdSetStencilCompareMask) +VK_DEVICE_PFN(vkCmdSetStencilReference) +VK_DEVICE_PFN(vkCmdSetStencilWriteMask) +VK_DEVICE_PFN(vkCmdSetViewport) +VK_DEVICE_PFN(vkCmdUpdateBuffer) +VK_DEVICE_PFN(vkCmdWaitEvents) +VK_DEVICE_PFN(vkCmdWriteTimestamp) +VK_DEVICE_PFN(vkCreateBuffer) +VK_DEVICE_PFN(vkCreateBufferView) +VK_DEVICE_PFN(vkCreateCommandPool) +VK_DEVICE_PFN(vkCreateComputePipelines) +VK_DEVICE_PFN(vkCreateDescriptorPool) +VK_DEVICE_PFN(vkCreateDescriptorSetLayout) +VK_DEVICE_PFN(vkCreateEvent) +VK_DEVICE_PFN(vkCreateFence) +VK_DEVICE_PFN(vkCreateFramebuffer) +VK_DEVICE_PFN(vkCreateGraphicsPipelines) +VK_DEVICE_PFN(vkCreateImage) +VK_DEVICE_PFN(vkCreateImageView) +VK_DEVICE_PFN(vkCreatePipelineCache) +VK_DEVICE_PFN(vkCreatePipelineLayout) +VK_DEVICE_PFN(vkCreateQueryPool) +VK_DEVICE_PFN(vkCreateRenderPass) +VK_DEVICE_PFN(vkCreateSampler) +VK_DEVICE_PFN(vkCreateSemaphore) +VK_DEVICE_PFN(vkCreateShaderModule) +VK_DEVICE_PFN(vkDestroyBuffer) +VK_DEVICE_PFN(vkDestroyBufferView) +VK_DEVICE_PFN(vkDestroyCommandPool) +VK_DEVICE_PFN(vkDestroyDescriptorPool) +VK_DEVICE_PFN(vkDestroyDescriptorSetLayout) +VK_DEVICE_PFN(vkDestroyEvent) +VK_DEVICE_PFN(vkDestroyFence) +VK_DEVICE_PFN(vkDestroyFramebuffer) +VK_DEVICE_PFN(vkDestroyImage) +VK_DEVICE_PFN(vkDestroyImageView) +VK_DEVICE_PFN(vkDestroyPipeline) +VK_DEVICE_PFN(vkDestroyPipelineCache) +VK_DEVICE_PFN(vkDestroyPipelineLayout) +VK_DEVICE_PFN(vkDestroyQueryPool) +VK_DEVICE_PFN(vkDestroyRenderPass) +VK_DEVICE_PFN(vkDestroySampler) +VK_DEVICE_PFN(vkDestroySemaphore) +VK_DEVICE_PFN(vkDestroyShaderModule) +VK_DEVICE_PFN(vkDeviceWaitIdle) +VK_DEVICE_PFN(vkEndCommandBuffer) +VK_DEVICE_PFN(vkFlushMappedMemoryRanges) +VK_DEVICE_PFN(vkFreeCommandBuffers) +VK_DEVICE_PFN(vkFreeDescriptorSets) +VK_DEVICE_PFN(vkFreeMemory) +VK_DEVICE_PFN(vkGetBufferMemoryRequirements) +VK_DEVICE_PFN(vkGetDeviceMemoryCommitment) +VK_DEVICE_PFN(vkGetDeviceQueue) +VK_DEVICE_PFN(vkGetEventStatus) +VK_DEVICE_PFN(vkGetFenceStatus) +VK_DEVICE_PFN(vkGetImageMemoryRequirements) +VK_DEVICE_PFN(vkGetImageSparseMemoryRequirements) +VK_DEVICE_PFN(vkGetImageSubresourceLayout) +VK_DEVICE_PFN(vkGetPipelineCacheData) +VK_DEVICE_PFN(vkGetQueryPoolResults) +VK_DEVICE_PFN(vkGetRenderAreaGranularity) +VK_DEVICE_PFN(vkInvalidateMappedMemoryRanges) +VK_DEVICE_PFN(vkMapMemory) +VK_DEVICE_PFN(vkMergePipelineCaches) +VK_DEVICE_PFN(vkQueueBindSparse) +VK_DEVICE_PFN(vkQueueSubmit) +VK_DEVICE_PFN(vkQueueWaitIdle) +VK_DEVICE_PFN(vkResetCommandBuffer) +VK_DEVICE_PFN(vkResetCommandPool) +VK_DEVICE_PFN(vkResetDescriptorPool) +VK_DEVICE_PFN(vkResetEvent) +VK_DEVICE_PFN(vkResetFences) +VK_DEVICE_PFN(vkSetEvent) +VK_DEVICE_PFN(vkUnmapMemory) +VK_DEVICE_PFN(vkUpdateDescriptorSets) +VK_DEVICE_PFN(vkWaitForFences) + +/* VK_KHR_draw_indirect_count */ +VK_DEVICE_EXT_PFN(vkCmdDrawIndirectCountKHR) +VK_DEVICE_EXT_PFN(vkCmdDrawIndexedIndirectCountKHR) + +/* VK_KHR_get_memory_requirements2 */ +VK_DEVICE_EXT_PFN(vkGetBufferMemoryRequirements2KHR) +VK_DEVICE_EXT_PFN(vkGetImageMemoryRequirements2KHR) +VK_DEVICE_EXT_PFN(vkGetImageSparseMemoryRequirements2KHR) + +/* VK_KHR_maintenance3 */ +VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutSupportKHR) + +/* VK_KHR_push_descriptor */ +VK_DEVICE_EXT_PFN(vkCmdPushDescriptorSetKHR) + +/* VK_EXT_calibrated_timestamps */ +VK_DEVICE_EXT_PFN(vkGetCalibratedTimestampsEXT) + +/* VK_EXT_conditional_rendering */ +VK_DEVICE_EXT_PFN(vkCmdBeginConditionalRenderingEXT) +VK_DEVICE_EXT_PFN(vkCmdEndConditionalRenderingEXT) + +/* VK_EXT_debug_marker */ +VK_DEVICE_EXT_PFN(vkDebugMarkerSetObjectNameEXT) + +/* VK_EXT_transform_feedback */ +VK_DEVICE_EXT_PFN(vkCmdBeginQueryIndexedEXT) +VK_DEVICE_EXT_PFN(vkCmdBeginTransformFeedbackEXT) +VK_DEVICE_EXT_PFN(vkCmdBindTransformFeedbackBuffersEXT) +VK_DEVICE_EXT_PFN(vkCmdEndQueryIndexedEXT) +VK_DEVICE_EXT_PFN(vkCmdEndTransformFeedbackEXT) + +#undef VK_INSTANCE_PFN +#undef VK_INSTANCE_EXT_PFN +#undef VK_DEVICE_PFN +#undef VK_DEVICE_EXT_PFN