/* * Copyright 2016 Paul Gofman * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ #include "config.h" #include "wine/port.h" #include "d3dx9_private.h" #include WINE_DEFAULT_DEBUG_CHANNEL(d3dx); enum pres_ops { PRESHADER_OP_NOP, PRESHADER_OP_MOV, }; typedef double (*pres_op_func)(double *args, int ncomp); static double pres_mov(double *args, int ncomp) {return args[0];} #define PRES_OPCODE_MASK 0x7ff00000 #define PRES_OPCODE_SHIFT 20 #define PRES_SCALAR_FLAG 0x80000000 #define PRES_NCOMP_MASK 0x0000ffff #define FOURCC_PRES 0x53455250 #define FOURCC_CLIT 0x54494c43 #define FOURCC_FXLC 0x434c5846 #define FOURCC_PRSI 0x49535250 #define PRES_SIGN 0x46580000 struct op_info { unsigned int opcode; char mnem[8]; unsigned int input_count; BOOL func_all_comps; pres_op_func func; }; static const struct op_info pres_op_info[] = { {0x000, "nop", 0, 0, NULL }, /* PRESHADER_OP_NOP */ {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */ }; enum pres_value_type { PRES_VT_FLOAT, PRES_VT_DOUBLE, PRES_VT_INT, PRES_VT_BOOL }; static const struct { unsigned int component_size; unsigned int reg_component_count; enum pres_value_type type; } table_info[] = { {sizeof(double), 1, PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */ {sizeof(float), 4, PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */ {sizeof(float), 4, PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */ {sizeof(BOOL), 1, PRES_VT_BOOL }, /* PRES_REGTAB_OBCONST */ {sizeof(int), 4, PRES_VT_INT, }, /* PRES_REGTAB_OICONST */ /* TODO: use double precision for 64 bit */ {sizeof(float), 4, PRES_VT_FLOAT } /* PRES_REGTAB_TEMP */ }; static const enum pres_reg_tables pres_regset2table[] = { PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ PRES_REGTAB_CONST, /* D3DXRS_FLOAT4 */ PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ }; static const enum pres_reg_tables shad_regset2table[] = { PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ PRES_REGTAB_OCONST, /* D3DXRS_FLOAT4 */ PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ }; struct d3dx_pres_operand { enum pres_reg_tables table; /* offset is component index, not register index, e. g. offset for component c3.y is 13 (3 * 4 + 1) */ unsigned int offset; }; #define MAX_INPUTS_COUNT 3 struct d3dx_pres_ins { enum pres_ops op; /* first input argument is scalar, scalar component is propagated */ BOOL scalar_op; unsigned int component_count; struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT]; struct d3dx_pres_operand output; }; static unsigned int get_reg_offset(unsigned int table, unsigned int offset) { return offset / table_info[table].reg_component_count; } #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8) static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table) { unsigned int size; size = rs->table_sizes[table] * table_info[table].reg_component_count * table_info[table].component_size; if (size) { rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size); rs->table_value_set[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*rs->table_value_set[table]) * ((rs->table_sizes[table] + PRES_BITMASK_BLOCK_SIZE - 1) / PRES_BITMASK_BLOCK_SIZE)); if (!rs->tables[table] || !rs->table_value_set[table]) return E_OUTOFMEMORY; } return D3D_OK; } static void regstore_free_tables(struct d3dx_regstore *rs) { unsigned int i; for (i = 0; i < PRES_REGTAB_COUNT; ++i) { HeapFree(GetProcessHeap(), 0, rs->tables[i]); HeapFree(GetProcessHeap(), 0, rs->table_value_set[i]); } } static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, void *data, unsigned int start_offset, unsigned int count) { unsigned int block_idx, start, end, start_block, end_block; if (!count) return; memcpy((BYTE *)rs->tables[table] + start_offset * table_info[table].component_size, data, count * table_info[table].component_size); start = get_reg_offset(table, start_offset); start_block = start / PRES_BITMASK_BLOCK_SIZE; start -= start_block * PRES_BITMASK_BLOCK_SIZE; end = get_reg_offset(table, start_offset + count - 1); end_block = end / PRES_BITMASK_BLOCK_SIZE; end = (end_block + 1) * PRES_BITMASK_BLOCK_SIZE - 1 - end; if (start_block == end_block) { rs->table_value_set[table][start_block] |= (~0u << start) & (~0u >> end); } else { rs->table_value_set[table][start_block] |= ~0u << start; for (block_idx = start_block + 1; block_idx < end_block; ++block_idx) rs->table_value_set[table][block_idx] = ~0u; rs->table_value_set[table][end_block] |= ~0u >> end; } } static void dump_bytecode(void *data, unsigned int size) { unsigned int *bytecode = (unsigned int *)data; unsigned int i, j, n; size /= sizeof(*bytecode); i = 0; while (i < size) { n = min(size - i, 8); for (j = 0; j < n; ++j) TRACE("0x%08x,", bytecode[i + j]); i += n; TRACE("\n"); } } static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count, unsigned int fourcc, unsigned int *size) { /* Provide at least one value in comment section on non-NULL return. */ while (count > 2 && (*ptr & 0xffff) == 0xfffe) { unsigned int section_size; section_size = (*ptr >> 16); if (!section_size || section_size + 1 > count) break; if (*(ptr + 1) == fourcc) { *size = section_size; return ptr + 2; } count -= section_size + 1; ptr += section_size + 1; } return NULL; } static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr) { static const enum pres_reg_tables reg_table[8] = { PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT, PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP }; if (count < 3) { WARN("Byte code buffer ends unexpectedly.\n"); return NULL; } if (*ptr) { FIXME("Relative addressing not supported yet, word %#x.\n", *ptr); return NULL; } ++ptr; if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT) { FIXME("Unsupported register table %#x.\n", *ptr); return NULL; } opr->table = reg_table[*ptr++]; opr->offset = *ptr++; if (opr->table == PRES_REGTAB_OBCONST) opr->offset /= 4; return ptr; } static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins) { unsigned int ins_code, ins_raw; unsigned int input_count; unsigned int i; if (count < 2) { WARN("Byte code buffer ends unexpectedly.\n"); return NULL; } ins_raw = *ptr++; ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT; ins->component_count = ins_raw & PRES_NCOMP_MASK; ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG); if (ins->component_count < 1 || ins->component_count > 4) { FIXME("Unsupported number of components %u.\n", ins->component_count); return NULL; } input_count = *ptr++; count -= 2; for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i) if (ins_code == pres_op_info[i].opcode) break; if (i == ARRAY_SIZE(pres_op_info)) { FIXME("Unknown opcode %#x, raw %#x.\n", ins_code, ins_raw); return NULL; } ins->op = i; if (input_count > ARRAY_SIZE(ins->inputs) || input_count != pres_op_info[i].input_count) { FIXME("Actual input args %u, expected %u, instruction %s.\n", input_count, pres_op_info[i].input_count, pres_op_info[i].mnem); return NULL; } for (i = 0; i < input_count; ++i) { unsigned int *p; p = parse_pres_arg(ptr, count, &ins->inputs[i]); if (!p) return NULL; count -= p - ptr; ptr = p; } ptr = parse_pres_arg(ptr, count, &ins->output); return ptr; } static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out, struct d3dx9_base_effect *base) { ID3DXConstantTable *ctab; D3DXCONSTANT_DESC *cdesc; struct d3dx_parameter **inputs_param; D3DXCONSTANTTABLE_DESC desc; HRESULT hr; D3DXHANDLE hc; unsigned int i; unsigned int count; out->inputs = cdesc = NULL; out->ctab = NULL; out->inputs_param = NULL; out->input_count = 0; inputs_param = NULL; hr = D3DXGetShaderConstantTable(byte_code, &ctab); if (FAILED(hr) || !ctab) { TRACE("Could not get CTAB data, hr %#x.\n", hr); /* returning OK, shaders and preshaders without CTAB are valid */ return D3D_OK; } hr = ID3DXConstantTable_GetDesc(ctab, &desc); if (FAILED(hr)) { FIXME("Could not get CTAB desc, hr %#x.\n", hr); goto err_out; } cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants); inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants); if (!cdesc || !inputs_param) { hr = E_OUTOFMEMORY; goto err_out; } for (i = 0; i < desc.Constants; ++i) { hc = ID3DXConstantTable_GetConstant(ctab, NULL, i); if (!hc) { FIXME("Null constant handle.\n"); goto err_out; } count = 1; hr = ID3DXConstantTable_GetConstantDesc(ctab, hc, &cdesc[i], &count); if (FAILED(hr)) { FIXME("Could not get constant desc, hr %#x.\n", hr); goto err_out; } inputs_param[i] = get_parameter_by_name(base, NULL, cdesc[i].Name); if (cdesc[i].Class == D3DXPC_OBJECT) TRACE("Object %s, parameter %p.\n", cdesc[i].Name, inputs_param[i]); else if (!inputs_param[i]) ERR("Could not find parameter %s in effect.\n", cdesc[i].Name); } out->input_count = desc.Constants; out->inputs = cdesc; out->inputs_param = inputs_param; out->ctab = ctab; return D3D_OK; err_out: HeapFree(GetProcessHeap(), 0, cdesc); HeapFree(GetProcessHeap(), 0, inputs_param); if (ctab) ID3DXConstantTable_Release(ctab); return hr; } static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register) { if (table < PRES_REGTAB_COUNT) table_sizes[table] = max(table_sizes[table], max_register + 1); } static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab) { unsigned int i, table, max_register; for (i = 0; i < ctab->input_count; ++i) { if (!ctab->inputs[i].RegisterCount) continue; max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1; table = ctab->regset2table[ctab->inputs[i].RegisterSet]; update_table_size(table_sizes, table, max_register); } } static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base) { unsigned int *p; unsigned int i, j, const_count; double *dconst; HRESULT hr; unsigned int saved_word; unsigned int section_size; TRACE("Preshader version %#x.\n", *ptr & 0xffff); if (!count) { WARN("Unexpected end of byte code buffer.\n"); return D3DXERR_INVALIDDATA; } p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, §ion_size); if (p) { const_count = *p++; if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int))) { WARN("Byte code buffer ends unexpectedly.\n"); return D3DXERR_INVALIDDATA; } dconst = (double *)p; } else { const_count = 0; dconst = NULL; } TRACE("%u double constants.\n", const_count); p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, §ion_size); if (!p) { WARN("Could not find preshader code.\n"); return D3D_OK; } pres->ins_count = *p++; --section_size; if (pres->ins_count > UINT_MAX / sizeof(*pres->ins)) { WARN("Invalid instruction count %u.\n", pres->ins_count); return D3DXERR_INVALIDDATA; } TRACE("%u instructions.\n", pres->ins_count); pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count); if (!pres->ins) return E_OUTOFMEMORY; for (i = 0; i < pres->ins_count; ++i) { unsigned int *ptr_next; ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]); if (!ptr_next) return D3DXERR_INVALIDDATA; section_size -= ptr_next - p; p = ptr_next; } saved_word = *ptr; *ptr = 0xfffe0000; hr = get_constants_desc(ptr, &pres->inputs, base); *ptr = saved_word; if (FAILED(hr)) return hr; pres->inputs.regset2table = pres_regset2table; pres->regs.table_sizes[PRES_REGTAB_IMMED] = const_count; for (i = 0; i < pres->ins_count; ++i) { for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j) update_table_size(pres->regs.table_sizes, pres->ins[i].inputs[j].table, get_reg_offset(pres->ins[i].inputs[j].table, pres->ins[i].inputs[j].offset + pres->ins[i].component_count - 1)); update_table_size(pres->regs.table_sizes, pres->ins[i].output.table, get_reg_offset(pres->ins[i].output.table, pres->ins[i].output.offset + pres->ins[i].component_count - 1)); } update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs); if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED))) return E_OUTOFMEMORY; regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count); return D3D_OK; } void d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size, D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out) { struct d3dx_param_eval *peval; unsigned int *ptr; HRESULT hr; unsigned int i; BOOL shader; unsigned int count, pres_size; TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n", base_effect, byte_code, byte_code_size, type, peval_out); count = byte_code_size / sizeof(unsigned int); if (!byte_code || !count) { *peval_out = NULL; return; } peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval)); if (!peval) goto err_out; peval->param_type = type; switch (type) { case D3DXPT_VERTEXSHADER: case D3DXPT_PIXELSHADER: shader = TRUE; break; default: shader = FALSE; break; } peval->shader_inputs.regset2table = shad_regset2table; ptr = (unsigned int *)byte_code; if (shader) { if ((*ptr & 0xfffe0000) != 0xfffe0000) { FIXME("Invalid shader signature %#x.\n", *ptr); goto err_out; } TRACE("Shader version %#x.\n", *ptr & 0xffff); if (FAILED(hr = get_constants_desc(ptr, &peval->shader_inputs, base_effect))) { FIXME("Could not get shader constant table, hr %#x.\n", hr); goto err_out; } update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs); ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size); if (!ptr) TRACE("No preshader found.\n"); } else { pres_size = count; } if (ptr && FAILED(parse_preshader(&peval->pres, ptr, pres_size, base_effect))) { FIXME("Failed parsing preshader, byte code for analysis follows.\n"); dump_bytecode(byte_code, byte_code_size); goto err_out; } for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i) { if (FAILED(regstore_alloc_table(&peval->pres.regs, i))) goto err_out; } if (TRACE_ON(d3dx)) dump_bytecode(byte_code, byte_code_size); *peval_out = peval; TRACE("Created parameter evaluator %p.\n", *peval_out); return; err_out: FIXME("Error creating parameter evaluator.\n"); d3dx_free_param_eval(peval); *peval_out = NULL; } static void d3dx_free_const_tab(struct d3dx_const_tab *ctab) { HeapFree(GetProcessHeap(), 0, ctab->inputs); HeapFree(GetProcessHeap(), 0, ctab->inputs_param); if (ctab->ctab) ID3DXConstantTable_Release(ctab->ctab); } static void d3dx_free_preshader(struct d3dx_preshader *pres) { HeapFree(GetProcessHeap(), 0, pres->ins); regstore_free_tables(&pres->regs); d3dx_free_const_tab(&pres->inputs); } void d3dx_free_param_eval(struct d3dx_param_eval *peval) { TRACE("peval %p.\n", peval); if (!peval) return; d3dx_free_preshader(&peval->pres); d3dx_free_const_tab(&peval->shader_inputs); HeapFree(GetProcessHeap(), 0, peval); }