Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions common.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def get_scene_render_state(scene: bpy.types.Scene):
convert=quantize_tuple(f64render_rs.default_convert, 9.0, -1.0, 1.0),
cc=SOLID_CC,
tex_confs=([get_tile_conf(getattr(f64render_rs, f"default_tex{i}")) for i in range(0, 8)]),
tex_size=(32, 32),
)
state.lights[0] = F64Light(
quantize_srgb(fast64_rs.light0Color, force_alpha=True), quantize_direction(fast64_rs.light0Direction)
Expand Down
2 changes: 1 addition & 1 deletion material/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def f64_material_parse(f3d_mat: "F3DMaterialProperty", always_set: bool, set_lig
state.cc = get_cc_settings(f3d_mat)
if always_set or (f3d_mat.set_prim and cc_uses["Primitive"]):
state.prim_color = quantize_srgb(f3d_mat.prim_color)
state.prim_lod = (f3d_mat.prim_lod_frac, f3d_mat.prim_lod_min)
state.prim_lod = quantize_tuple((f3d_mat.prim_lod_frac, f3d_mat.prim_lod_min), 8)
if always_set or (f3d_mat.set_env and cc_uses["Environment"]):
state.env_color = quantize_srgb(f3d_mat.env_color)
if always_set or (f3d_mat.set_key and cc_uses["Key"]): # extra 0 for alignment
Expand Down
39 changes: 20 additions & 19 deletions renderer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from io import StringIO
import math
import pathlib
import time
Expand Down Expand Up @@ -97,24 +98,24 @@ def init_shader(self, scene: bpy.types.Scene):
print("Compiling shader")

shaderPath = (pathlib.Path(__file__).parent / "shader").resolve()
shaderVert = ""
shaderFrag = ""

with open(shaderPath / "utils.glsl", "r", encoding="utf-8") as f:
shaderUtils = f.read()
shaderVert += shaderUtils
shaderFrag += shaderUtils

with open(shaderPath / "defines.glsl", "r", encoding="utf-8") as f:
shaderDef = f.read()
shaderVert += shaderDef
shaderFrag += shaderDef

with open(shaderPath / "main3d.vert.glsl", "r", encoding="utf-8") as f:
shaderVert += f.read()
shaderVert = StringIO()
shaderFrag = StringIO()

general_shaders = ("utils.glsl", "defines.glsl")
vertex_shaders = ("main3d.vert.glsl",)
frag_shaders = (
"textures.glsl",
"main3d.frag.glsl",
)

with open(shaderPath / "main3d.frag.glsl", "r", encoding="utf-8") as f:
shaderFrag += f.read()
for shader in general_shaders + vertex_shaders:
with open(shaderPath / shader, "r", encoding="utf-8") as f:
shaderVert.write(f.read())
shaderVert.write("\n")
for shader in general_shaders + frag_shaders:
with open(shaderPath / shader, "r", encoding="utf-8") as f:
shaderFrag.write(f.read())
shaderFrag.write("\n")

shader_info = gpu.types.GPUShaderCreateInfo()

Expand Down Expand Up @@ -158,8 +159,8 @@ def init_shader(self, scene: bpy.types.Scene):
else:
shader_info.fragment_out(0, "VEC4", "FragColor")

shader_info.vertex_source(shaderVert)
shader_info.fragment_source(shaderFrag)
shader_info.vertex_source(shaderVert.getvalue())
shader_info.fragment_source(shaderFrag.getvalue())

self.shader = gpu.shader.create_from_info(shader_info)
self.shader_fallback = gpu.shader.from_builtin(
Expand Down
131 changes: 10 additions & 121 deletions shader/main3d.frag.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -3,129 +3,9 @@
#extension GL_ARB_fragment_shader_interlock : enable
layout(pixel_interlock_unordered) in;
#endif
#ifdef GL_ARB_derivative_control
#extension GL_ARB_derivative_control : enable
#endif

#define DECAL_DEPTH_DELTA 100

vec4 quantize3Bit(in vec4 color) {
return vec4(round(color.rgb * 8.0) / 8.0, step(0.5, color.a));
}

vec4 quantize4Bit(in vec4 color) {
return round(color * 16.0) / 16.0; // (16 seems more accurate than 15)
}

vec4 quantizeTexture(uint flags, vec4 color) {
vec4 colorQuant = flagSelect(flags, TEX_FLAG_4BIT, color, quantize4Bit(color));
colorQuant = flagSelect(flags, TEX_FLAG_3BIT, colorQuant, quantize3Bit(colorQuant));
colorQuant.rgb = linearToGamma(colorQuant.rgb);
return flagSelect(flags, TEX_FLAG_MONO, colorQuant.rgba, colorQuant.rrrr);
}

vec4 sampleSampler(in const sampler2D tex, in const TileConf tileConf, in vec2 uvCoord, in const uint texFilter) {
// https://github.com/rt64/rt64/blob/61aa08f517cd16c1dbee4e097768b08e2a060307/src/shaders/TextureSampler.hlsli#L156-L276
const ivec2 texSize = textureSize(tex, 0);

uvCoord *= tileConf.shift;

#ifdef SIMULATE_LOW_PRECISION
// Simulates the lower precision of the hardware's coordinate interpolation.
uvCoord = round(uvCoord * LOW_PRECISION) / LOW_PRECISION;
#endif

uvCoord -= tileConf.low;

const vec2 isClamp = step(tileConf.mask, vec2(1.0)); // if mask is negated, clamp
const vec2 isMirror = step(tileConf.high, vec2(0.0)); // if high is negated, mirror
const vec2 mask = abs(tileConf.mask);
const vec2 highMinusLow = abs(tileConf.high) - abs(tileConf.low);

if (texFilter != G_TF_POINT) {
uvCoord -= 0.5 * tileConf.shift;
const vec2 texelBaseInt = floor(uvCoord);
const vec4 sample00 = wrappedMirrorSample(tex, texelBaseInt, mask, highMinusLow, isClamp, isMirror);
const vec4 sample01 = wrappedMirrorSample(tex, texelBaseInt + vec2(0, 1), mask, highMinusLow, isClamp, isMirror);
const vec4 sample10 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 0), mask, highMinusLow, isClamp, isMirror);
const vec4 sample11 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 1), mask, highMinusLow, isClamp, isMirror);
const vec2 fracPart = uvCoord - texelBaseInt;
#ifdef USE_LINEAR_FILTER
return quantizeTexture(tileConf.flags, mix(mix(sample00, sample10, fracPart.x), mix(sample01, sample11, fracPart.x), fracPart.y));
#else
if (texFilter == G_TF_AVERAGE && all(lessThanEqual(vec2(1 / LOW_PRECISION), abs(fracPart - 0.5)))) {
return quantizeTexture(tileConf.flags, (sample00 + sample01 + sample10 + sample11) / 4.0f);
}
else {
// Originally written by ArthurCarvalho
// Sourced from https://www.emutalk.net/threads/emulating-nintendo-64-3-sample-bilinear-filtering-using-shaders.54215/
vec4 tri0 = mix(sample00, sample10, fracPart.x) + (sample01 - sample00) * fracPart.y;
vec4 tri1 = mix(sample11, sample01, 1.0 - fracPart.x) + (sample10 - sample11) * (1.0 - fracPart.y);
return quantizeTexture(tileConf.flags, mix(tri0, tri1, step(1.0, fracPart.x + fracPart.y)));
}
#endif
}
else {
return quantizeTexture(tileConf.flags, wrappedMirrorSample(tex, ivec2(floor(uvCoord)), mask, highMinusLow, isClamp, isMirror));
}
}

vec4 sampleIndex(in const uint textureIndex, in const vec2 uvCoord, in const uint texFilter) {
TileConf tileConf = material.texConfs[textureIndex];
switch (textureIndex) {
default: return sampleSampler(tex0, tileConf, uvCoord, texFilter);
case 1: return sampleSampler(tex1, tileConf, uvCoord, texFilter);
case 2: return sampleSampler(tex2, tileConf, uvCoord, texFilter);
case 3: return sampleSampler(tex3, tileConf, uvCoord, texFilter);
case 4: return sampleSampler(tex4, tileConf, uvCoord, texFilter);
case 5: return sampleSampler(tex5, tileConf, uvCoord, texFilter);
case 6: return sampleSampler(tex6, tileConf, uvCoord, texFilter);
case 7: return sampleSampler(tex7, tileConf, uvCoord, texFilter);
}
}

float computeLOD(inout uint tileIndex0, inout uint tileIndex1) {
// https://github.com/rt64/rt64/blob/0ca92eeb6c2f58ce3581c65f87f7261b8ac0fea0/src/shaders/TextureSampler.hlsli#L18
if (textLOD() == G_TL_TILE)
return 1.0f;
const uint texDetail = textDetail();
const bool lodSharpen = texDetail == G_TD_SHARPEN;
const bool lodDetail = texDetail == G_TD_DETAIL;
const bool lodSharpDetail = lodSharpen || lodDetail;

#ifdef GL_ARB_derivative_control
const vec2 dfd = abs(vec2(dFdxCoarse(inputUV.x), dFdyCoarse(inputUV.y)));
#else
const vec2 dfd = abs(vec2(dFdx(inputUV.x), dFdy(inputUV.y)));
#endif
float maxDst = max(dfd.x, dfd.y);

if (lodSharpDetail)
maxDst = max(maxDst, material.primLod.y);

int tileBase = int(floor(log2(maxDst)));
float lodFraction = maxDst / pow(2, max(tileBase, 0)) - 1.0;

if (lodSharpen && maxDst < 1.0)
lodFraction = maxDst - 1.0;

if (lodDetail) {
if (lodFraction < 0.0)
lodFraction = maxDst;
tileBase += 1;
} else if (tileBase >= material.mipCount)
lodFraction = 1.0;

if (lodSharpDetail)
tileBase = max(tileBase, 0);
else
lodFraction = max(lodFraction, 0.0);

tileIndex0 = clamp(tileBase, 0, material.mipCount);
tileIndex1 = clamp(tileBase + 1, 0, material.mipCount);
return lodFraction;
}

vec3 cc_fetchColor(in int val, in vec4 shade, in vec4 comb, in float lodFraction, in vec4 texData0, in vec4 texData1)
{
if(val == CC_C_COMB ) return comb.rgb;
Expand Down Expand Up @@ -264,9 +144,18 @@ void main()

vec4 ccShade = geoModeSelect(G_SHADE_SMOOTH, cc_shade_flat, cc_shade);

#ifdef GL_ARB_derivative_control
const vec2 dx = abs(vec2(dFdxCoarse(inputUV.x), dFdyCoarse(inputUV.x)));
const vec2 dy = abs(vec2(dFdxCoarse(inputUV.y), dFdyCoarse(inputUV.y)));
#else
const vec2 dx = abs(vec2(dFdx(inputUV.x), dFdy(inputUV.x)));
const vec2 dy = abs(vec2(dFdx(inputUV.y), dFdy(inputUV.y)));
#endif

uint tex0Index = 0;
uint tex1Index = 1;
const float lodFraction = computeLOD(tex0Index, tex1Index);
float lodFraction = 0.0;
computeLOD(tex0Index, tex1Index, textLOD(), textDetail(), material.primLod.y, dx, dy, false, lodFraction);

vec4 texData0 = sampleIndex(tex0Index, inputUV, texFilter);
vec4 texData1 = sampleIndex(tex1Index, inputUV, texFilter);
Expand Down
150 changes: 150 additions & 0 deletions shader/textures.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#ifdef GL_ARB_derivative_control
#extension GL_ARB_derivative_control : enable
#endif

vec4 quantize3Bit(in vec4 color) {
return vec4(round(color.rgb * 8.0) / 8.0, step(0.5, color.a));
}

vec4 quantize4Bit(in vec4 color) {
return round(color * 16.0) / 16.0; // (16 seems more accurate than 15)
}

vec4 quantizeTexture(uint flags, vec4 color) {
vec4 colorQuant = flagSelect(flags, TEX_FLAG_4BIT, color, quantize4Bit(color));
colorQuant = flagSelect(flags, TEX_FLAG_3BIT, colorQuant, quantize3Bit(colorQuant));
colorQuant.rgb = linearToGamma(colorQuant.rgb);
return flagSelect(flags, TEX_FLAG_MONO, colorQuant.rgba, colorQuant.rrrr);
}

vec2 mirrorUV(const vec2 uvIn, const vec2 uvBound)
{
vec2 uvMod2 = mod(uvIn, uvBound * 2.0 + 1.0);
return mix(uvMod2, (uvBound * 2.0) - uvMod2, step(uvBound, uvMod2));
}

vec4 wrappedMirrorSample(const sampler2D tex, vec2 uv, const vec2 mask, const vec2 highMinusLow, const vec2 isClamp, const vec2 isMirror)
{
const ivec2 texSize = textureSize(tex, 0);

// first apply clamping if enabled (clamp S/T, low S/T -> high S/T)
const vec2 uvClamp = clamp(uv, vec2(0.0), highMinusLow);
uv = mix(uv, uvClamp, isClamp);

// then mirror the result if needed (mirror S/T)
const vec2 uvMirror = mirrorUV(uv, mask - 0.5);
uv = mix(uv, uvMirror, isMirror);

// clamp again (mask S/T), this is also done to avoid OOB texture access
uv = mod(uv, min(texSize, mask));

return texelFetch(tex, ivec2(floor(uv)), 0);
}

vec4 sampleSampler(in const sampler2D tex, in const TileConf tileConf, in vec2 uvCoord, in const uint texFilter) {
// https://github.com/rt64/rt64/blob/61aa08f517cd16c1dbee4e097768b08e2a060307/src/shaders/TextureSampler.hlsli#L156-L276
const ivec2 texSize = textureSize(tex, 0);

uvCoord *= tileConf.shift;

#ifdef SIMULATE_LOW_PRECISION
// Simulates the lower precision of the hardware's coordinate interpolation.
uvCoord = round(uvCoord * LOW_PRECISION) / LOW_PRECISION;
#endif

uvCoord -= tileConf.low;

const vec2 isClamp = step(tileConf.mask, vec2(1.0)); // if mask is negated, clamp
const vec2 isMirror = step(tileConf.high, vec2(0.0)); // if high is negated, mirror
const vec2 mask = abs(tileConf.mask);
const vec2 highMinusLow = abs(tileConf.high) - abs(tileConf.low);

if (texFilter != G_TF_POINT) {
uvCoord -= 0.5 * tileConf.shift;
const vec2 texelBaseInt = floor(uvCoord);
const vec4 sample00 = wrappedMirrorSample(tex, texelBaseInt, mask, highMinusLow, isClamp, isMirror);
const vec4 sample01 = wrappedMirrorSample(tex, texelBaseInt + vec2(0, 1), mask, highMinusLow, isClamp, isMirror);
const vec4 sample10 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 0), mask, highMinusLow, isClamp, isMirror);
const vec4 sample11 = wrappedMirrorSample(tex, texelBaseInt + vec2(1, 1), mask, highMinusLow, isClamp, isMirror);
const vec2 fracPart = uvCoord - texelBaseInt;
#ifdef USE_LINEAR_FILTER
return quantizeTexture(tileConf.flags, mix(mix(sample00, sample10, fracPart.x), mix(sample01, sample11, fracPart.x), fracPart.y));
#else
if (texFilter == G_TF_AVERAGE && all(lessThanEqual(vec2(1 / LOW_PRECISION), abs(fracPart - 0.5)))) {
return quantizeTexture(tileConf.flags, (sample00 + sample01 + sample10 + sample11) / 4.0f);
}
else {
// Originally written by ArthurCarvalho
// Sourced from https://www.emutalk.net/threads/emulating-nintendo-64-3-sample-bilinear-filtering-using-shaders.54215/
vec4 tri0 = mix(sample00, sample10, fracPart.x) + (sample01 - sample00) * fracPart.y;
vec4 tri1 = mix(sample11, sample01, 1.0 - fracPart.x) + (sample10 - sample11) * (1.0 - fracPart.y);
return quantizeTexture(tileConf.flags, mix(tri0, tri1, step(1.0, fracPart.x + fracPart.y)));
}
#endif
}
else {
return quantizeTexture(tileConf.flags, wrappedMirrorSample(tex, ivec2(floor(uvCoord)), mask, highMinusLow, isClamp, isMirror));
}
}

vec4 sampleIndex(in const uint textureIndex, in const vec2 uvCoord, in const uint texFilter) {
TileConf tileConf = material.texConfs[textureIndex];
switch (textureIndex) {
default: return sampleSampler(tex0, tileConf, uvCoord, texFilter);
case 1: return sampleSampler(tex1, tileConf, uvCoord, texFilter);
case 2: return sampleSampler(tex2, tileConf, uvCoord, texFilter);
case 3: return sampleSampler(tex3, tileConf, uvCoord, texFilter);
case 4: return sampleSampler(tex4, tileConf, uvCoord, texFilter);
case 5: return sampleSampler(tex5, tileConf, uvCoord, texFilter);
case 6: return sampleSampler(tex6, tileConf, uvCoord, texFilter);
case 7: return sampleSampler(tex7, tileConf, uvCoord, texFilter);
}
}

void computeLOD(
inout uint tileIndex0,
inout uint tileIndex1,
const bool textLOD,
const uint textDetail,
const float minLod,
const vec2 dx,
const vec2 dy,
const bool perspectiveOverflow, // this should be possible from what I've read in parallel-rdp, can always be removed
out float lodFrac
) {
const bool sharpen = textDetail == G_TD_SHARPEN;
const bool detail = textDetail == G_TD_DETAIL;
const bool clam = textDetail == G_TD_CLAMP;

const vec2 dfd = max(dx, dy);
// TODO: should this value be scaled by clipping planes?
const float maxDist = max(dfd.x, dfd.y);

const uint mipBase = uint(floor(log2(maxDist)));
const bool distant = perspectiveOverflow || maxDist >= 16384.0;
const bool aboveCount = mipBase >= material.mipCount;
const bool maxDistant = distant || aboveCount;
const bool magnify = maxDist < 1.0;

const float detailFrac = max(minLod, maxDist) - float(sharpen);
const float magnifedFrac = mix(float(maxDistant), detailFrac, float(!clam));
const float distantFrac = float(distant || (aboveCount && clam));
const float notClampedFrac = max(maxDist / pow(2, max(mipBase, 0)) - 1.0, minLod);

const float notMagnifedFrac = mix(distantFrac, notClampedFrac, !maxDistant || !clam);
lodFrac = mix(notMagnifedFrac, magnifedFrac, float(!distant && magnify));

if (textLOD) {
const uint tileOffset = maxDistant ? material.mipCount : (mipBase * int(!(maxDistant && clam)));
tileIndex0 = tileIndex0 + tileOffset;
tileIndex1 = tileIndex0;
if (detail) {
tileIndex1 += (int(!(maxDistant || magnify)) + 1);
tileIndex0 += int(!magnify);
} else {
tileIndex1 += uint(!maxDistant && (sharpen || !magnify));
}
tileIndex0 &= 7;
tileIndex1 &= 7;
}
}
Loading