diff --git a/common.py b/common.py index 9940bf0..ee4c94e 100644 --- a/common.py +++ b/common.py @@ -69,6 +69,7 @@ def get_scene_render_state(scene: bpy.types.Scene): convert=quantize_tuple(f64render_rs.default_convert, 9.0, -1.0, 1.0), cc=SOLID_CC, tex_confs=([get_tile_conf(getattr(f64render_rs, f"default_tex{i}")) for i in range(0, 8)]), + tex_size=(32, 32), ) state.lights[0] = F64Light( quantize_srgb(fast64_rs.light0Color, force_alpha=True), quantize_direction(fast64_rs.light0Direction) diff --git a/material/parser.py b/material/parser.py index a7d1cc2..906114f 100644 --- a/material/parser.py +++ b/material/parser.py @@ -394,7 +394,7 @@ def f64_material_parse(f3d_mat: "F3DMaterialProperty", always_set: bool, set_lig state.cc = get_cc_settings(f3d_mat) if always_set or (f3d_mat.set_prim and cc_uses["Primitive"]): state.prim_color = quantize_srgb(f3d_mat.prim_color) - state.prim_lod = (f3d_mat.prim_lod_frac, f3d_mat.prim_lod_min) + state.prim_lod = quantize_tuple((f3d_mat.prim_lod_frac, f3d_mat.prim_lod_min), 8) if always_set or (f3d_mat.set_env and cc_uses["Environment"]): state.env_color = quantize_srgb(f3d_mat.env_color) if always_set or (f3d_mat.set_key and cc_uses["Key"]): # extra 0 for alignment diff --git a/renderer.py b/renderer.py index 39187c1..57ae969 100644 --- a/renderer.py +++ b/renderer.py @@ -1,3 +1,4 @@ +from io import StringIO import math import pathlib import time @@ -97,24 +98,24 @@ def init_shader(self, scene: bpy.types.Scene): print("Compiling shader") shaderPath = (pathlib.Path(__file__).parent / "shader").resolve() - shaderVert = "" - shaderFrag = "" - - with open(shaderPath / "utils.glsl", "r", encoding="utf-8") as f: - shaderUtils = f.read() - shaderVert += shaderUtils - shaderFrag += shaderUtils - - with open(shaderPath / "defines.glsl", "r", encoding="utf-8") as f: - shaderDef = f.read() - shaderVert += shaderDef - shaderFrag += shaderDef - - with open(shaderPath / "main3d.vert.glsl", "r", encoding="utf-8") as f: - shaderVert += f.read() + shaderVert = StringIO() + shaderFrag = StringIO() + + general_shaders = ("utils.glsl", "defines.glsl") + vertex_shaders = ("main3d.vert.glsl",) + frag_shaders = ( + "textures.glsl", + "main3d.frag.glsl", + ) - with open(shaderPath / "main3d.frag.glsl", "r", encoding="utf-8") as f: - shaderFrag += f.read() + for shader in general_shaders + vertex_shaders: + with open(shaderPath / shader, "r", encoding="utf-8") as f: + shaderVert.write(f.read()) + shaderVert.write("\n") + for shader in general_shaders + frag_shaders: + with open(shaderPath / shader, "r", encoding="utf-8") as f: + shaderFrag.write(f.read()) + shaderFrag.write("\n") shader_info = gpu.types.GPUShaderCreateInfo() @@ -158,8 +159,8 @@ def init_shader(self, scene: bpy.types.Scene): else: shader_info.fragment_out(0, "VEC4", "FragColor") - shader_info.vertex_source(shaderVert) - shader_info.fragment_source(shaderFrag) + shader_info.vertex_source(shaderVert.getvalue()) + shader_info.fragment_source(shaderFrag.getvalue()) self.shader = gpu.shader.create_from_info(shader_info) self.shader_fallback = gpu.shader.from_builtin( diff --git a/shader/main3d.frag.glsl b/shader/main3d.frag.glsl index 45e4e74..f5c0fba 100644 --- a/shader/main3d.frag.glsl +++ b/shader/main3d.frag.glsl @@ -3,129 +3,9 @@ #extension GL_ARB_fragment_shader_interlock : enable layout(pixel_interlock_unordered) in; #endif -#ifdef GL_ARB_derivative_control - #extension GL_ARB_derivative_control : enable -#endif #define DECAL_DEPTH_DELTA 100 -vec4 quantize3Bit(in vec4 color) { - return vec4(round(color.rgb * 8.0) / 8.0, step(0.5, color.a)); -} - -vec4 quantize4Bit(in vec4 color) { - return round(color * 16.0) / 16.0; // (16 seems more accurate than 15) -} - -vec4 quantizeTexture(uint flags, vec4 color) { - vec4 colorQuant = flagSelect(flags, TEX_FLAG_4BIT, color, quantize4Bit(color)); - colorQuant = flagSelect(flags, TEX_FLAG_3BIT, colorQuant, quantize3Bit(colorQuant)); - colorQuant.rgb = linearToGamma(colorQuant.rgb); - return flagSelect(flags, TEX_FLAG_MONO, colorQuant.rgba, colorQuant.rrrr); -} - -vec4 sampleSampler(in const sampler2D tex, in const TileConf tileConf, in vec2 uvCoord, in const uint texFilter) { - // https://github.com/rt64/rt64/blob/61aa08f517cd16c1dbee4e097768b08e2a060307/src/shaders/TextureSampler.hlsli#L156-L276 - const ivec2 texSize = textureSize(tex, 0); - - uvCoord *= tileConf.shift; - -#ifdef SIMULATE_LOW_PRECISION - // Simulates the lower precision of the hardware's coordinate interpolation. - uvCoord = round(uvCoord * LOW_PRECISION) / LOW_PRECISION; -#endif - - uvCoord -= tileConf.low; - - const vec2 isClamp = step(tileConf.mask, vec2(1.0)); // if mask is negated, clamp - const vec2 isMirror = step(tileConf.high, vec2(0.0)); // if high is negated, mirror - const vec2 mask = abs(tileConf.mask); - const vec2 highMinusLow = abs(tileConf.high) - abs(tileConf.low); - - if (texFilter != G_TF_POINT) { - uvCoord -= 0.5 * tileConf.shift; - const vec2 texelBaseInt = floor(uvCoord); - const vec4 sample00 = wrappedMirrorSample(tex, texelBaseInt, mask, highMinusLow, isClamp, isMirror); - const vec4 sample01 = wrappedMirrorSample(tex, texelBaseInt + vec2(0, 1), mask, highMinusLow, isClamp, isMirror); - const vec4 sample10 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 0), mask, highMinusLow, isClamp, isMirror); - const vec4 sample11 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 1), mask, highMinusLow, isClamp, isMirror); - const vec2 fracPart = uvCoord - texelBaseInt; -#ifdef USE_LINEAR_FILTER - return quantizeTexture(tileConf.flags, mix(mix(sample00, sample10, fracPart.x), mix(sample01, sample11, fracPart.x), fracPart.y)); -#else - if (texFilter == G_TF_AVERAGE && all(lessThanEqual(vec2(1 / LOW_PRECISION), abs(fracPart - 0.5)))) { - return quantizeTexture(tileConf.flags, (sample00 + sample01 + sample10 + sample11) / 4.0f); - } - else { - // Originally written by ArthurCarvalho - // Sourced from https://www.emutalk.net/threads/emulating-nintendo-64-3-sample-bilinear-filtering-using-shaders.54215/ - vec4 tri0 = mix(sample00, sample10, fracPart.x) + (sample01 - sample00) * fracPart.y; - vec4 tri1 = mix(sample11, sample01, 1.0 - fracPart.x) + (sample10 - sample11) * (1.0 - fracPart.y); - return quantizeTexture(tileConf.flags, mix(tri0, tri1, step(1.0, fracPart.x + fracPart.y))); - } -#endif - } - else { - return quantizeTexture(tileConf.flags, wrappedMirrorSample(tex, ivec2(floor(uvCoord)), mask, highMinusLow, isClamp, isMirror)); - } -} - -vec4 sampleIndex(in const uint textureIndex, in const vec2 uvCoord, in const uint texFilter) { - TileConf tileConf = material.texConfs[textureIndex]; - switch (textureIndex) { - default: return sampleSampler(tex0, tileConf, uvCoord, texFilter); - case 1: return sampleSampler(tex1, tileConf, uvCoord, texFilter); - case 2: return sampleSampler(tex2, tileConf, uvCoord, texFilter); - case 3: return sampleSampler(tex3, tileConf, uvCoord, texFilter); - case 4: return sampleSampler(tex4, tileConf, uvCoord, texFilter); - case 5: return sampleSampler(tex5, tileConf, uvCoord, texFilter); - case 6: return sampleSampler(tex6, tileConf, uvCoord, texFilter); - case 7: return sampleSampler(tex7, tileConf, uvCoord, texFilter); - } -} - -float computeLOD(inout uint tileIndex0, inout uint tileIndex1) { - // https://github.com/rt64/rt64/blob/0ca92eeb6c2f58ce3581c65f87f7261b8ac0fea0/src/shaders/TextureSampler.hlsli#L18 - if (textLOD() == G_TL_TILE) - return 1.0f; - const uint texDetail = textDetail(); - const bool lodSharpen = texDetail == G_TD_SHARPEN; - const bool lodDetail = texDetail == G_TD_DETAIL; - const bool lodSharpDetail = lodSharpen || lodDetail; - -#ifdef GL_ARB_derivative_control - const vec2 dfd = abs(vec2(dFdxCoarse(inputUV.x), dFdyCoarse(inputUV.y))); -#else - const vec2 dfd = abs(vec2(dFdx(inputUV.x), dFdy(inputUV.y))); -#endif - float maxDst = max(dfd.x, dfd.y); - - if (lodSharpDetail) - maxDst = max(maxDst, material.primLod.y); - - int tileBase = int(floor(log2(maxDst))); - float lodFraction = maxDst / pow(2, max(tileBase, 0)) - 1.0; - - if (lodSharpen && maxDst < 1.0) - lodFraction = maxDst - 1.0; - - if (lodDetail) { - if (lodFraction < 0.0) - lodFraction = maxDst; - tileBase += 1; - } else if (tileBase >= material.mipCount) - lodFraction = 1.0; - - if (lodSharpDetail) - tileBase = max(tileBase, 0); - else - lodFraction = max(lodFraction, 0.0); - - tileIndex0 = clamp(tileBase, 0, material.mipCount); - tileIndex1 = clamp(tileBase + 1, 0, material.mipCount); - return lodFraction; -} - vec3 cc_fetchColor(in int val, in vec4 shade, in vec4 comb, in float lodFraction, in vec4 texData0, in vec4 texData1) { if(val == CC_C_COMB ) return comb.rgb; @@ -264,9 +144,18 @@ void main() vec4 ccShade = geoModeSelect(G_SHADE_SMOOTH, cc_shade_flat, cc_shade); +#ifdef GL_ARB_derivative_control + const vec2 dx = abs(vec2(dFdxCoarse(inputUV.x), dFdyCoarse(inputUV.x))); + const vec2 dy = abs(vec2(dFdxCoarse(inputUV.y), dFdyCoarse(inputUV.y))); +#else + const vec2 dx = abs(vec2(dFdx(inputUV.x), dFdy(inputUV.x))); + const vec2 dy = abs(vec2(dFdx(inputUV.y), dFdy(inputUV.y))); +#endif + uint tex0Index = 0; uint tex1Index = 1; - const float lodFraction = computeLOD(tex0Index, tex1Index); + float lodFraction = 0.0; + computeLOD(tex0Index, tex1Index, textLOD(), textDetail(), material.primLod.y, dx, dy, false, lodFraction); vec4 texData0 = sampleIndex(tex0Index, inputUV, texFilter); vec4 texData1 = sampleIndex(tex1Index, inputUV, texFilter); diff --git a/shader/textures.glsl b/shader/textures.glsl new file mode 100644 index 0000000..ad9893e --- /dev/null +++ b/shader/textures.glsl @@ -0,0 +1,150 @@ +#ifdef GL_ARB_derivative_control + #extension GL_ARB_derivative_control : enable +#endif + +vec4 quantize3Bit(in vec4 color) { + return vec4(round(color.rgb * 8.0) / 8.0, step(0.5, color.a)); +} + +vec4 quantize4Bit(in vec4 color) { + return round(color * 16.0) / 16.0; // (16 seems more accurate than 15) +} + +vec4 quantizeTexture(uint flags, vec4 color) { + vec4 colorQuant = flagSelect(flags, TEX_FLAG_4BIT, color, quantize4Bit(color)); + colorQuant = flagSelect(flags, TEX_FLAG_3BIT, colorQuant, quantize3Bit(colorQuant)); + colorQuant.rgb = linearToGamma(colorQuant.rgb); + return flagSelect(flags, TEX_FLAG_MONO, colorQuant.rgba, colorQuant.rrrr); +} + +vec2 mirrorUV(const vec2 uvIn, const vec2 uvBound) +{ + vec2 uvMod2 = mod(uvIn, uvBound * 2.0 + 1.0); + return mix(uvMod2, (uvBound * 2.0) - uvMod2, step(uvBound, uvMod2)); +} + +vec4 wrappedMirrorSample(const sampler2D tex, vec2 uv, const vec2 mask, const vec2 highMinusLow, const vec2 isClamp, const vec2 isMirror) +{ + const ivec2 texSize = textureSize(tex, 0); + + // first apply clamping if enabled (clamp S/T, low S/T -> high S/T) + const vec2 uvClamp = clamp(uv, vec2(0.0), highMinusLow); + uv = mix(uv, uvClamp, isClamp); + + // then mirror the result if needed (mirror S/T) + const vec2 uvMirror = mirrorUV(uv, mask - 0.5); + uv = mix(uv, uvMirror, isMirror); + + // clamp again (mask S/T), this is also done to avoid OOB texture access + uv = mod(uv, min(texSize, mask)); + + return texelFetch(tex, ivec2(floor(uv)), 0); +} + +vec4 sampleSampler(in const sampler2D tex, in const TileConf tileConf, in vec2 uvCoord, in const uint texFilter) { + // https://github.com/rt64/rt64/blob/61aa08f517cd16c1dbee4e097768b08e2a060307/src/shaders/TextureSampler.hlsli#L156-L276 + const ivec2 texSize = textureSize(tex, 0); + + uvCoord *= tileConf.shift; + +#ifdef SIMULATE_LOW_PRECISION + // Simulates the lower precision of the hardware's coordinate interpolation. + uvCoord = round(uvCoord * LOW_PRECISION) / LOW_PRECISION; +#endif + + uvCoord -= tileConf.low; + + const vec2 isClamp = step(tileConf.mask, vec2(1.0)); // if mask is negated, clamp + const vec2 isMirror = step(tileConf.high, vec2(0.0)); // if high is negated, mirror + const vec2 mask = abs(tileConf.mask); + const vec2 highMinusLow = abs(tileConf.high) - abs(tileConf.low); + + if (texFilter != G_TF_POINT) { + uvCoord -= 0.5 * tileConf.shift; + const vec2 texelBaseInt = floor(uvCoord); + const vec4 sample00 = wrappedMirrorSample(tex, texelBaseInt, mask, highMinusLow, isClamp, isMirror); + const vec4 sample01 = wrappedMirrorSample(tex, texelBaseInt + vec2(0, 1), mask, highMinusLow, isClamp, isMirror); + const vec4 sample10 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 0), mask, highMinusLow, isClamp, isMirror); + const vec4 sample11 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 1), mask, highMinusLow, isClamp, isMirror); + const vec2 fracPart = uvCoord - texelBaseInt; +#ifdef USE_LINEAR_FILTER + return quantizeTexture(tileConf.flags, mix(mix(sample00, sample10, fracPart.x), mix(sample01, sample11, fracPart.x), fracPart.y)); +#else + if (texFilter == G_TF_AVERAGE && all(lessThanEqual(vec2(1 / LOW_PRECISION), abs(fracPart - 0.5)))) { + return quantizeTexture(tileConf.flags, (sample00 + sample01 + sample10 + sample11) / 4.0f); + } + else { + // Originally written by ArthurCarvalho + // Sourced from https://www.emutalk.net/threads/emulating-nintendo-64-3-sample-bilinear-filtering-using-shaders.54215/ + vec4 tri0 = mix(sample00, sample10, fracPart.x) + (sample01 - sample00) * fracPart.y; + vec4 tri1 = mix(sample11, sample01, 1.0 - fracPart.x) + (sample10 - sample11) * (1.0 - fracPart.y); + return quantizeTexture(tileConf.flags, mix(tri0, tri1, step(1.0, fracPart.x + fracPart.y))); + } +#endif + } + else { + return quantizeTexture(tileConf.flags, wrappedMirrorSample(tex, ivec2(floor(uvCoord)), mask, highMinusLow, isClamp, isMirror)); + } +} + +vec4 sampleIndex(in const uint textureIndex, in const vec2 uvCoord, in const uint texFilter) { + TileConf tileConf = material.texConfs[textureIndex]; + switch (textureIndex) { + default: return sampleSampler(tex0, tileConf, uvCoord, texFilter); + case 1: return sampleSampler(tex1, tileConf, uvCoord, texFilter); + case 2: return sampleSampler(tex2, tileConf, uvCoord, texFilter); + case 3: return sampleSampler(tex3, tileConf, uvCoord, texFilter); + case 4: return sampleSampler(tex4, tileConf, uvCoord, texFilter); + case 5: return sampleSampler(tex5, tileConf, uvCoord, texFilter); + case 6: return sampleSampler(tex6, tileConf, uvCoord, texFilter); + case 7: return sampleSampler(tex7, tileConf, uvCoord, texFilter); + } +} + +void computeLOD( + inout uint tileIndex0, + inout uint tileIndex1, + const bool textLOD, + const uint textDetail, + const float minLod, + const vec2 dx, + const vec2 dy, + const bool perspectiveOverflow, // this should be possible from what I've read in parallel-rdp, can always be removed + out float lodFrac +) { + const bool sharpen = textDetail == G_TD_SHARPEN; + const bool detail = textDetail == G_TD_DETAIL; + const bool clam = textDetail == G_TD_CLAMP; + + const vec2 dfd = max(dx, dy); + // TODO: should this value be scaled by clipping planes? + const float maxDist = max(dfd.x, dfd.y); + + const uint mipBase = uint(floor(log2(maxDist))); + const bool distant = perspectiveOverflow || maxDist >= 16384.0; + const bool aboveCount = mipBase >= material.mipCount; + const bool maxDistant = distant || aboveCount; + const bool magnify = maxDist < 1.0; + + const float detailFrac = max(minLod, maxDist) - float(sharpen); + const float magnifedFrac = mix(float(maxDistant), detailFrac, float(!clam)); + const float distantFrac = float(distant || (aboveCount && clam)); + const float notClampedFrac = max(maxDist / pow(2, max(mipBase, 0)) - 1.0, minLod); + + const float notMagnifedFrac = mix(distantFrac, notClampedFrac, !maxDistant || !clam); + lodFrac = mix(notMagnifedFrac, magnifedFrac, float(!distant && magnify)); + + if (textLOD) { + const uint tileOffset = maxDistant ? material.mipCount : (mipBase * int(!(maxDistant && clam))); + tileIndex0 = tileIndex0 + tileOffset; + tileIndex1 = tileIndex0; + if (detail) { + tileIndex1 += (int(!(maxDistant || magnify)) + 1); + tileIndex0 += int(!magnify); + } else { + tileIndex1 += uint(!maxDistant && (sharpen || !magnify)); + } + tileIndex0 &= 7; + tileIndex1 &= 7; + } +} diff --git a/shader/utils.glsl b/shader/utils.glsl index 74a5c74..851d275 100644 --- a/shader/utils.glsl +++ b/shader/utils.glsl @@ -26,7 +26,7 @@ vec3 linearToGamma(in vec3 color) { #define cycleType() (OTHER_MODE_H & (3 << G_MDSFT_CYCLETYPE)) #define texFilter() (OTHER_MODE_H & (3 << G_MDSFT_TEXTFILT)) #define textPersp() (OTHER_MODE_H & (1 << G_MDSFT_TEXTPERSP)) -#define textLOD() (OTHER_MODE_H & (1 << G_MDSFT_TEXTLOD)) +#define textLOD() (bool(OTHER_MODE_H & (1 << G_MDSFT_TEXTLOD))) #define textDetail()(OTHER_MODE_H & (3 << G_MDSFT_TEXTDETAIL)) #define boolSelect(cond, a, b) (bool(mix(a, b, cond))) @@ -35,27 +35,3 @@ float noise(in vec2 uv) { return fract(sin(dot(uv, vec2(12.9898, 78.233)))* 43758.5453); } - -vec2 mirrorUV(const vec2 uvIn, const vec2 uvBound) -{ - vec2 uvMod2 = mod(uvIn, uvBound * 2.0 + 1.0); - return mix(uvMod2, (uvBound * 2.0) - uvMod2, step(uvBound, uvMod2)); -} - -vec4 wrappedMirrorSample(const sampler2D tex, vec2 uv, const vec2 mask, const vec2 highMinusLow, const vec2 isClamp, const vec2 isMirror) -{ - const ivec2 texSize = textureSize(tex, 0); - - // first apply clamping if enabled (clamp S/T, low S/T -> high S/T) - const vec2 uvClamp = clamp(uv, vec2(0.0), highMinusLow); - uv = mix(uv, uvClamp, isClamp); - - // then mirror the result if needed (mirror S/T) - const vec2 uvMirror = mirrorUV(uv, mask - 0.5); - uv = mix(uv, uvMirror, isMirror); - - // clamp again (mask S/T), this is also done to avoid OOB texture access - uv = mod(uv, min(texSize, mask)); - - return texelFetch(tex, ivec2(floor(uv)), 0); -}