diff --git a/source/gfx/prim.h b/source/gfx/prim.h index 528522105..44b9db5b8 100644 --- a/source/gfx/prim.h +++ b/source/gfx/prim.h @@ -5,10 +5,9 @@ #ifndef __PRIM_HEADER__ #define __PRIM_HEADER__ -#ifndef _GLOBAL_HEADER_ +#ifndef _GLOBAL_HEADER_ #include "system\global.h" #endif -#include "gfx\gpugte.h" #include "gfx\tpage.h" #define MAX_OT (2048) @@ -16,6 +15,48 @@ #define USE_NTAGS 1 + +/************************************************************************************/ +#define GPU_PolyF3Tag (4) +#define GPU_PolyF3Code (0x20) +#define GPU_PolyF4Tag (5) +#define GPU_PolyF4Code (0x28) +#define GPU_PolyFT3Tag (7) +#define GPU_PolyFT3Code (0x24) +#define GPU_PolyFT4Tag (9) +#define GPU_PolyFT4Code (0x2c) +#define GPU_PolyG4Tag (8) +#define GPU_PolyG4Code (0x38) +#define GPU_PolyGT3Tag (9) +#define GPU_PolyGT3Code (0x34) +#define GPU_PolyGT4Tag (12) +#define GPU_PolyGT4Code (0x3c) + +#define GPUCode_ShadeTex (1<<0) // Setting this **DISABLES** texture shading. +#define GPUCode_SemiTrans (1<<1) // Setting this enables semi-transparent mode +#define GPUCode_Textured (1<<2) +#define GPUCode_Quad (1<<3) +#define GPUCode_Gouraud (1<<4) +#define GPUCode_Global (1<<5) + +/******************************************************************************/ +#define GetPrimSpace(primtype,Ptr) ((primtype *)Ptr); Ptr+=sizeof(primtype); + +#define setSemiTransPolyF3(p) setlen(p, 4), setcode(p, 0x20|GPUCode_SemiTrans) +#define setSemiTransPolyFT3(p) setlen(p, 7), setcode(p, 0x24|GPUCode_SemiTrans) +#define setSemiTransPolyG3(p) setlen(p, 6), setcode(p, 0x30|GPUCode_SemiTrans) +#define setSemiTransPolyGT3(p) setlen(p, 9), setcode(p, 0x34|GPUCode_SemiTrans) +#define setSemiTransPolyF4(p) setlen(p, 5), setcode(p, 0x28|GPUCode_SemiTrans) +#define setSemiTransPolyFT4(p) setlen(p, 9), setcode(p, 0x2c|GPUCode_SemiTrans) +#define setSemiTransPolyG4(p) setlen(p, 8), setcode(p, 0x38|GPUCode_SemiTrans) +#define setSemiTransPolyGT4(p) setlen(p, 12), setcode(p, 0x3c|GPUCode_SemiTrans) + +#define setShadeTexPolyFT3(p) setlen(p, 7), setcode(p, 0x24|GPUCode_ShadeTex) +#define setShadeTexPolyFT4(p) setlen(p, 9), setcode(p, 0x2c|GPUCode_ShadeTex) + +#define setSemiTransShadeTexPolyFT3(p) setlen(p, 7), setcode(p, 0x24|GPUCode_SemiTrans|GPUCode_ShadeTex) +#define setSemiTransShadeTexPolyFT4(p) setlen(p, 9), setcode(p, 0x2c|GPUCode_SemiTrans|GPUCode_ShadeTex) + /*** Fast Replacements *********************************************************************************/ #undef setaddr #undef getaddr diff --git a/source/utils/gpu.inc b/source/utils/gpu.inc new file mode 100644 index 000000000..ce7ca2976 --- /dev/null +++ b/source/utils/gpu.inc @@ -0,0 +1,496 @@ +;******************* +;*** GPU Defines *** +;******************* + +;****************************************************************************** +; SVECTOR +; ------- +; short vx,vy,vz,vw + + rsreset +SVECTOR_vx rh 1 +SVECTOR_vy rh 1 +SVECTOR_vz rh 1 +SVECTOR_vw rh 1 +SVECTOR_size rb 0 + +;****************************************************************************** +; VECTOR +; ------- +; long vx,vy,vz,vw + + rsreset +VECTOR_vx rw 1 +VECTOR_vy rw 1 +VECTOR_vz rw 1 +VECTOR_vw rw 1 +VECTOR_size rb 0 + +;****************************************************************************** +; POLY_F3 +; ------- +; u_long tag; +; u_char r0, g0, b0, code; +; short x0, y0; +; short x1, y1; +; short x2, y2; + + rsreset +F3_tag rb 3 +F3_len rb 1 + +F3_rgb0 rb 0 +F3_r0 rb 1 +F3_g0 rb 1 +F3_b0 rb 1 +F3_code rb 1 + +F3_xy0 rh 0 +F3_x0 rh 1 +F3_y0 rh 1 + +F3_xy1 rh 0 +F3_x1 rh 1 +F3_y1 rh 1 + +F3_xy2 rh 0 +F3_x2 rh 1 +F3_y2 rh 1 + +F3_size rb 0 +F3_PrimSize equ 4 +F3_PrimCode equ $20 + +;------------------------------------------------------------------------------ +; POLY_FT3 +; -------- +; u_long tag; +; u_char r0, g0, b0, code; +; short x0, y0; +; u_char u0, v0; +; u_short clut; +; short x1, y1; +; u_char u1, v1; +; u_short tpage; +; short x2, y2; +; u_char u2, v2; +; u_short pad1; + + rsreset +FT3_tag rb 3 +FT3_len rb 1 + +FT3_rgb0 rb 0 +FT3_r0 rb 1 +FT3_g0 rb 1 +FT3_b0 rb 1 +FT3_code rb 1 + +FT3_xy0 rb 0 +FT3_x0 rh 1 +FT3_y0 rh 1 + +FT3_uv0 rb 0 +FT3_u0 rb 1 +FT3_v0 rb 1 +FT3_clut rh 1 + +FT3_xy1 rb 0 +FT3_x1 rh 1 +FT3_y1 rh 1 + +FT3_uv1 rb 0 +FT3_u1 rb 1 +FT3_v1 rb 1 +FT3_tpage rh 1 + +FT3_xy2 rb 0 +FT3_x2 rh 1 +FT3_y2 rh 1 + +FT3_uv2 rb 0 +FT3_u2 rb 1 +FT3_v2 rb 1 +FT3_pad1 rh 1 + +FT3_size rb 0 +FT3_PrimSize equ 7 +FT3_PrimCode equ $24 + +;****************************************************************************** +; POLY_G3 +; ------- +; u_long tag; +; u_char r0, g0, b0, code; +; short x0, y0; +; u_char r1, g1, b1, pad1; +; short x1, y1; +; u_char r2, g2, b2, pad2; +; short x2, y2; + + rsreset +G3_tag rb 3 +G3_len rb 1 + +G3_rgb0 rb 0 +G3_r0 rb 1 +G3_g0 rb 1 +G3_b0 rb 1 +G3_code rb 1 + +G3_xy0 rh 0 +G3_x0 rh 1 +G3_y0 rh 1 + +G3_rgb1 rb 0 +G3_r1 rb 1 +G3_g1 rb 1 +G3_b1 rb 1 +G3_p1 rb 1 + +G3_xy1 rh 0 +G3_x1 rh 1 +G3_y1 rh 1 + +G3_rgb2 rb 0 +G3_r2 rb 1 +G3_g2 rb 1 +G3_b2 rb 1 +G3_p2 rb 1 + +G3_xy2 rh 0 +G3_x2 rh 1 +G3_y2 rh 1 + +G3_size rb 0 +G3_PrimSize equ 6 +G3_PrimCode equ $30 + +;------------------------------------------------------------------------------ +; POLY_GT3 +; -------- +; u_long tag; +; u_char r0, g0, b0, code; +; short x0, y0; +; u_char u0, v0; +; u_short clut; +; u_char r1, g1, b1, p1; +; short x1, y1; +; u_char u1, v1; +; u_short tpage; +; u_char r2, g2, b2, p2; +; short x2, y2; +; u_char u2, v2; +; u_short pad2; + + rsreset +GT3_tag rb 3 +GT3_len rb 1 + +GT3_rgb0 rb 0 +GT3_r0 rb 1 +GT3_g0 rb 1 +GT3_b0 rb 1 +GT3_code rb 1 + +GT3_xy0 rb 0 +GT3_x0 rh 1 +GT3_y0 rh 1 + +GT3_uv0 rb 0 +GT3_u0 rb 1 +GT3_v0 rb 1 +GT3_clut rh 1 + +GT3_rgb1 rb 0 +GT3_r1 rb 1 +GT3_g1 rb 1 +GT3_b1 rb 1 +GT3_p1 rb 1 + +GT3_xy1 rb 0 +GT3_x1 rh 1 +GT3_y1 rh 1 + +GT3_uv1 rb 0 +GT3_u1 rb 1 +GT3_v1 rb 1 +GT3_tpage rh 1 + +GT3_rgb2 rb 0 +GT3_r2 rb 1 +GT3_g2 rb 1 +GT3_b2 rb 1 +GT3_p2 rb 1 + +GT3_xy2 rb 0 +GT3_x2 rh 1 +GT3_y2 rh 1 + +GT3_uv2 rb 0 +GT3_u2 rb 1 +GT3_v2 rb 1 +GT3_pad2 rh 1 + +GT3_size rb 0 +GT3_PrimSize equ 9 +GT3_PrimCode equ $34 + +;------------------------------------------------------------------------------ +; POLY_F4 +; ------- +; u_long tag; +; u_char r0, g0, b0, code; +; short x0, y0; +; short x1, y1; +; short x2, y2; +; short x3, y3; + + rsreset +F4_tag rb 3 +F4_len rb 1 + +F4_rgb0 rb 0 +F4_r0 rb 1 +F4_g0 rb 1 +F4_b0 rb 1 +F4_code rb 1 + +F4_xy0 rh 0 +F4_x0 rh 1 +F4_y0 rh 1 + +F4_xy1 rh 0 +F4_x1 rh 1 +F4_y1 rh 1 + +F4_xy2 rh 0 +F4_x2 rh 1 +F4_y2 rh 1 + +F4_size rb 0 +F4_PrimSize equ 5 +F4_PrimCode equ $28 + +;------------------------------------------------------------------------------ +; POLY_FT4 +; -------- +; u_long tag; +; u_char r0, g0, b0, code; +; short x0, y0; +; u_char u0, v0; +; u_short clut; +; short x1, y1; +; u_char u1, v1; +; u_short tpage; +; short x2, y2; +; u_char u2, v2; +; u_short pad1; +; short x3, y3; +; u_char u3, v3; +; u_short pad2; + + rsreset +FT4_tag rb 3 +FT4_len rb 1 + +FT4_rgb0 rb 0 +FT4_r0 rb 1 +FT4_g0 rb 1 +FT4_b0 rb 1 +FT4_code rb 1 + +FT4_xy0 rb 0 +FT4_x0 rh 1 +FT4_y0 rh 1 + +FT4_uv0 rb 0 +FT4_u0 rb 1 +FT4_v0 rb 1 +FT4_clut rh 1 + +FT4_xy1 rb 0 +FT4_x1 rh 1 +FT4_y1 rh 1 + +FT4_uv1 rb 0 +FT4_u1 rb 1 +FT4_v1 rb 1 +FT4_tpage rh 1 + +FT4_xy2 rb 0 +FT4_x2 rh 1 +FT4_y2 rh 1 + +FT4_uv2 rb 0 +FT4_u2 rb 1 +FT4_v2 rb 1 +FT4_pad2 rh 1 + +FT4_xy3 rb 0 +FT4_x3 rh 1 +FT4_y3 rh 1 + +FT4_uv3 rb 0 +FT4_u3 rb 1 +FT4_v3 rb 1 +FT4_pad3 rh 1 + +FT4_size rb 0 +FT4_PrimSize equ 9 +FT4_PrimCode equ $2c + + +;------------------------------------------------------------------------------ +; POLY_G4 +; ------- +; u_long tag; +; u_char r0, g0, b0, code; +; short x0, y0; +; u_char r1, g1, b1, pad1; +; short x1, y1; +; u_char r2, g2, b2, pad2; +; short x2, y2; +; u_char r3, g3, b3, pad3; +; short x3, y3; + + + rsreset +G4_tag rb 3 +G4_len rb 1 + +G4_rgb0 rb 0 +G4_r0 rb 1 +G4_g0 rb 1 +G4_b0 rb 1 +G4_code rb 1 + +G4_xy0 rh 0 +G4_x0 rh 1 +G4_y0 rh 1 + +G4_rgb1 rb 0 +G4_r1 rb 1 +G4_g1 rb 1 +G4_b1 rb 1 +G4_p1 rb 1 + +G4_xy1 rh 0 +G4_x1 rh 1 +G4_y1 rh 1 + +G4_rgb2 rb 0 +G4_r2 rb 1 +G4_g2 rb 1 +G4_b2 rb 1 +G4_p2 rb 1 + +G4_xy2 rh 0 +G4_x2 rh 1 +G4_y2 rh 1 + +G4_rgb3 rb 0 +G4_r3 rb 1 +G4_g3 rb 1 +G4_b3 rb 1 +G4_p3 rb 1 + +G4_xy3 rh 0 +G4_x3 rh 1 +G4_y3 rh 1 + +G4_size rb 0 +G4_PrimSize equ 8 +G4_PrimCode equ $38 + +;------------------------------------------------------------------------------ +; POLY_GT4 +; -------- +; u_long tag; +; u_char r0, g0, b0, code; +; short x0, y0; +; u_char u0, v0; +; u_short clut; +; u_char r1, g1, b1, p1; +; short x1, y1; +; u_char u1, v1; +; u_short tpage; +; u_char r2, g2, b2, p2; +; short x2, y2; +; u_char u2, v2; +; u_short pad2; +; u_char r3, g3, b3, p3; +; short x3, y3; +; u_char u3, v3; +; u_short pad3; + + rsreset +GT4_tag rb 3 +GT4_len rb 1 + +GT4_rgb0 rb 0 +GT4_r0 rb 1 +GT4_g0 rb 1 +GT4_b0 rb 1 +GT4_code rb 1 + +GT4_xy0 rb 0 +GT4_x0 rh 1 +GT4_y0 rh 1 + +GT4_uv0 rb 0 +GT4_u0 rb 1 +GT4_v0 rb 1 +GT4_clut rh 1 + +GT4_rgb1 rb 0 +GT4_r1 rb 1 +GT4_g1 rb 1 +GT4_b1 rb 1 +GT4_p1 rb 1 + +GT4_xy1 rb 0 +GT4_x1 rh 1 +GT4_y1 rh 1 + +GT4_uv1 rb 0 +GT4_u1 rb 1 +GT4_v1 rb 1 +GT4_tpage rh 1 + +GT4_rgb2 rb 0 +GT4_r2 rb 1 +GT4_g2 rb 1 +GT4_b2 rb 1 +GT4_p2 rb 1 + +GT4_xy2 rb 0 +GT4_x2 rh 1 +GT4_y2 rh 1 + +GT4_uv2 rb 0 +GT4_u2 rb 1 +GT4_v2 rb 1 +GT4_pad2 rh 1 + +GT4_rgb3 rb 0 +GT4_r3 rb 1 +GT4_g3 rb 1 +GT4_b3 rb 1 +GT4_p3 rb 1 + +GT4_xy3 rb 0 +GT4_x3 rh 1 +GT4_y3 rh 1 + +GT4_uv3 rb 0 +GT4_u3 rb 1 +GT4_v3 rb 1 +GT4_pad3 rh 1 + +GT4_size rb 0 +GT4_PrimSize equ 12 +GT4_PrimCode equ $3c + +; ========================================================================= +; end diff --git a/source/utils/gtemisc.h b/source/utils/gtemisc.h new file mode 100644 index 000000000..e7f9228ca --- /dev/null +++ b/source/utils/gtemisc.h @@ -0,0 +1,739 @@ +/* gtemisc.h ZZKJ + Contains miscellaneous GTE defines. + Note that this also contains all sorts of C compiler assembler macros for + the CPU as well as for CPU access to the GTE. +*/ + + +#ifndef __GTEMISC_H__ +#define __GTEMISC_H__ + +/* Convert an integer to a GTE fixed point value, and vice versa. + libgte.h contains the definition of ONE which is 4096=1<<12 +*/ +#define GTE_IntToFixed(i) ((i)<<12) +#define GTE_FixedToInt(f) ((f)>>12) + +/* The value for a full circle (360 degrees) for the GTE libraries */ +#define FULLCIRCLE ONE + + +/******************************************************************************/ + + +/* All the gte_ld* set macros take a long word VALUE as their parameter */ +/* All the gte_st* read macros take a POINTER TO a long word as their parameter */ +/* All the gte_get* read macros return a long word VALUE */ +/* Note that the compiler seems to know to insert a NOP delay after reading a GTE + register into a CPU register before using it. +*/ + +/* These have been made out of the ones in the Sony library file INLINE_C.H + with some new creation for the gte_get* versions ZZKJ +*/ + +/* Control registers */ + +/* Set the Light Source Direction Vector ("L" matrix) first line X (and Y) L11,L12 values */ +#define gte_ldL12L11( r0 ) __asm__ volatile ( \ + "ctc2 %0, $8" \ + : \ + : "r"( r0 ) ) + +/* Set the Light Source Direction Vector ("L" matrix) first line Z (and second line X) L13,L21 values */ +#define gte_ldL21L13( r0 ) __asm__ volatile ( \ + "ctc2 %0, $9" \ + : \ + : "r"( r0 ) ) + +/* Set the DQA register in the GTE */ +#define gte_lddqa( r0 ) __asm__ volatile ( \ + "ctc2 %0, $27" \ + : \ + : "r"( r0 ) ) + +/* Read the DQA register in the GTE */ +#define gte_stdqa( r0 ) __asm__ volatile ( \ + "cfc2 $12, $27;" \ + "nop;" \ + "sw $12, 0( %0 )" \ + : \ + : "r"( r0 ) \ + : "$12", "memory" ) + +/* Get the DQA register from the GTE */ +#define gte_getdqa( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "cfc2 %0, $27;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/* Set the DQB register in the GTE */ +#define gte_lddqb( r0 ) __asm__ volatile ( \ + "ctc2 %0, $28" \ + : \ + : "r"( r0 ) ) + +/* Read the DQB register in the GTE */ +#define gte_stdqb( r0 ) __asm__ volatile ( \ + "cfc2 $12, $28;" \ + "nop;" \ + "sw $12, 0( %0 )" \ + : \ + : "r"( r0 ) \ + : "$12", "memory" ) + +/* Get the DQB register from the GTE */ +#define gte_getdqb( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "cfc2 %0, $28;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/* Set the ZSF3 (ZAverage3 scaling factor) register in the GTE */ +#define gte_ldzsf3( r0 ) __asm__ volatile ( \ + "ctc2 %0, $29" \ + : \ + : "r"( r0 ) ) + +/* Read the ZSF3 (ZAverage3 scaling factor) register in the GTE */ +#define gte_stzsf3( r0 ) __asm__ volatile ( \ + "cfc2 $12, $29;" \ + "nop;" \ + "sw $12, 0( %0 )" \ + : \ + : "r"( r0 ) \ + : "$12", "memory" ) + +/* Set the ZSF4 (ZAverage4 scaling factor) register in the GTE */ +#define gte_ldzsf4( r0 ) __asm__ volatile ( \ + "ctc2 %0, $30" \ + : \ + : "r"( r0 ) ) + +/* Read the ZSF4 (ZAverage4 scaling factor) register in the GTE */ +#define gte_stzsf4( r0 ) __asm__ volatile ( \ + "cfc2 $12, $30;" \ + "nop;" \ + "sw $12, 0( %0 )" \ + : \ + : "r"( r0 ) \ + : "$12", "memory" ) + +/* Get the FLAGS register from the GTE */ +#define gte_getflg( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "cfc2 %0, $31;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/******************************************************************************/ + +/* Data registers */ + +/* Get the OTZ result register from the GTE */ +#define gte_getotz( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $7;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/* Get the SZx(0) Z result register from the GTE */ +#define gte_getszx( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $16;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/* Get the SZ0 Z result register from the GTE */ +#define gte_getsz0( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $17;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/* Set the SZ0 Z register in the GTE */ +#define gte_ldsz0( r0 ) __asm__ volatile ( \ + "mtc2 %0, $17" \ + : \ + : "r"( r0 ) ) + +/* Get the SZ1 Z result register from the GTE */ +#define gte_getsz1( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $18;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/* Set the SZ1 Z register in the GTE */ +#define gte_ldsz1( r0 ) __asm__ volatile ( \ + "mtc2 %0, $18" \ + : \ + : "r"( r0 ) ) + +/* Get the SZ2 Z result register from the GTE */ +#define gte_getsz2( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $19;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/* Get the SZ Z result register from the GTE */ +#define gte_getsz gte_getsz2 + +/* Set the SZ2 Z register in the GTE */ +#define gte_ldsz2( r0 ) __asm__ volatile ( \ + "mtc2 %0, $19" \ + : \ + : "r"( r0 ) ) + +/******************************************************************************/ + +/* Set the IR0 register in the GTE */ +#define gte_ldir0( r0 ) gte_lddp( r0 ) +/* Read the IR0 register in the GTE */ +#define gte_stir0( r0 ) gte_stdp( r0 ) + +/* Get the IR0 register from the GTE */ +#define gte_getir0( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $8;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) +#define gte_getdp( ) gte_getir0( ) + +/* Set the IR1 register in the GTE */ +#define gte_ldir1( r0 ) __asm__ volatile ( \ + "mtc2 %0, $9" \ + : \ + : "r"( r0 ) ) + +/* Read the IR1 register in the GTE */ +#define gte_stir1( r0 ) __asm__ volatile ( \ + "swc2 $9, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +/* Get the IR1 register from the GTE */ +#define gte_getir1( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $9;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/* Set the IR2 register in the GTE */ +#define gte_ldir2( r0 ) __asm__ volatile ( \ + "mtc2 %0, $10" \ + : \ + : "r"( r0 ) ) + +/* Read the IR2 register in the GTE */ +#define gte_stir2( r0 ) __asm__ volatile ( \ + "swc2 $10, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +/* Get the IR2 register from the GTE */ +#define gte_getir2( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $10;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/* Set the IR3 register in the GTE */ +#define gte_ldir3( r0 ) __asm__ volatile ( \ + "mtc2 %0, $11" \ + : \ + : "r"( r0 ) ) + +/* Read the IR3 register in the GTE */ +#define gte_stir3( r0 ) __asm__ volatile ( \ + "swc2 $11, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + + +/* Get the IR3 register from the GTE */ +#define gte_getir3( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $11;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/******************************************************************************/ + +/* Read the MAC0 register in the GTE */ +#define gte_stmac0( r0 ) gte_stopz( r0 ) + +/* Get the MAC0 register from the GTE */ +#define gte_getmac0( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $24;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) +#define gte_getopz( ) gte_getmac0( ) + +/* Read the MAC1 register in the GTE */ +#define gte_stmac1( r0 ) gte_stlvnl0( r0 ) + +/* Get the MAC1 register from the GTE */ +#define gte_getmac1( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $25;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) +#define gte_getlvnl0( ) gte_getmac1( ) + +/* Read the MAC2 register in the GTE */ +#define gte_stmac2( r0 ) gte_stlvnl1( r0 ) + +/* Get the MAC2 register from the GTE */ +#define gte_getmac2( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $26;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) +#define gte_getlvnl1( ) gte_getmac2( ) + +/* Read the MAC3 register in the GTE */ +#define gte_stmac3( r0 ) gte_stlvnl2( r0 ) + +/* Get the MAC3 register from the GTE */ +#define gte_getmac3( ) \ + ({ long r0; \ + __asm__ volatile ( \ + "mfc2 %0, $27;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) +#define gte_getlvnl2( ) gte_getmac3( ) + +/******************************************************************************/ +/******************************************************************************/ + +/* Load the Screen XY registers from memory */ +/* This is passed the base pointer register and the actual pointer as pairs for + each vertex +*/ +/* Setup vertex 0 only */ +#define gte_ldsxy0_memory(b0,a0) INTERNAL_gte_ldsxy0_memory( b0,((ULONG)a0)-((ULONG)b0) ) + +#define INTERNAL_gte_ldsxy0_memory(r0,r1) __asm__ volatile ( \ + "lwc2 $12, %1( %0 )" \ + : \ + : "r"( r0 ), "i"( r1 ) ) + +/* Setup all 3 vertices */ +#define gte_ldsxy3_memory(b0,a0,b1,a1,b2,a2) INTERNAL_gte_ldsxy3_memory( \ + b0,((ULONG)a0)-((ULONG)b0), \ + b1,((ULONG)a1)-((ULONG)b1), \ + b2,((ULONG)a2)-((ULONG)b2) ) + +#define INTERNAL_gte_ldsxy3_memory(r0,r1,r2,r3,r4,r5) __asm__ volatile ( \ + "lwc2 $12, %1( %0 );" \ + "lwc2 $13, %3( %2 );" \ + "lwc2 $14, %5( %4 )" \ + : \ + : "r"( r0 ), "i"( r1 ), "r"( r2 ), "i"( r3 ), "r"( r4 ), "i"( r5 ) ) + + +/* Store all 3 vertices in RAM */ +#define gte_stsxy3_memory(b0,a0,b1,a1,b2,a2) INTERNAL_gte_stsxy3_memory( \ + b0,((ULONG)a0)-((ULONG)b0), \ + b1,((ULONG)a1)-((ULONG)b1), \ + b2,((ULONG)a2)-((ULONG)b2) ) + +#define INTERNAL_gte_stsxy3_memory(r0,r1,r2,r3,r4,r5) __asm__ volatile ( \ + "swc2 $12, %1( %0 );" \ + "swc2 $13, %3( %2 );" \ + "swc2 $14, %5( %4 )" \ + : \ + : "r"( r0 ), "i"( r1 ), "r"( r2 ), "i"( r3 ), "r"( r4 ), "i"( r5 ) ) + + +/* Load the Screen Z registers from memory */ +/* This is passed the base pointer register and the actual pointer as pairs for + each vertex +*/ +/* Setup all 3 values for Z Average 3 */ +#define gte_ldsz3_memory(b0,a0,b1,a1,b2,a2) INTERNAL_gte_ldsz3_memory( \ + b0,((ULONG)a0)-((ULONG)b0), \ + b1,((ULONG)a1)-((ULONG)b1), \ + b2,((ULONG)a2)-((ULONG)b2) ) + +#define INTERNAL_gte_ldsz3_memory(r0,r1,r2,r3,r4,r5) __asm__ volatile ( \ + "lwc2 $17, %1( %0 );" \ + "lwc2 $18, %3( %2 );" \ + "lwc2 $19, %5( %4 )" \ + : \ + : "r"( r0 ), "i"( r1 ), "r"( r2 ), "i"( r3 ), "r"( r4 ), "i"( r5 ) ) + +/* Setup all 4 values for Z Average 4 */ +#define gte_ldsz4_memory(b0,a0,b1,a1,b2,a2,b3,a3) INTERNAL_gte_ldsz4_memory( \ + b0,((ULONG)a0)-((ULONG)b0), \ + b1,((ULONG)a1)-((ULONG)b1), \ + b2,((ULONG)a2)-((ULONG)b2), \ + b3,((ULONG)a3)-((ULONG)b3) ) + +#define INTERNAL_gte_ldsz4_memory(r0,r1,r2,r3,r4,r5,r6,r7) __asm__ volatile ( \ + "lwc2 $16, %1( %0 );" \ + "lwc2 $17, %3( %2 );" \ + "lwc2 $18, %5( %4 );" \ + "lwc2 $19, %7( %6 )" \ + : \ + : "r"( r0 ), "i"( r1 ), "r"( r2 ), "i"( r3 ), "r"( r4 ), "i"( r5 ), "r"( r6 ), "i"( r7 ) ) + +/******************************************************************************/ +/******************************************************************************/ + +/* Setup the input vector registers */ + +/* Set the XY0 register in the GTE from a register. X in lower 16 bits, Y in upper 16 */ +#define gte_ldv0XY_reg( r0 ) __asm__ volatile ( \ + "mtc2 %0, $0" \ + : \ + : "r"( r0 ) ) + +/* Set the Z0 register in the GTE from a register. */ +#define gte_ldv0Z_reg( r0 ) __asm__ volatile ( \ + "mtc2 %0, $1" \ + : \ + : "r"( r0 ) ) + +/* Set the Z0 register in the GTE from memory. */ +#define gte_ldv0Z( r0 ) __asm__ volatile ( \ + "lwc2 $1, 0( %0 )" \ + : \ + : "r"( r0 ) ) + +/* Set the XY1 register in the GTE from a register. X in lower 16 bits, Y in upper 16 */ +#define gte_ldv1XY_reg( r0 ) __asm__ volatile ( \ + "mtc2 %0, $2" \ + : \ + : "r"( r0 ) ) + +/* Set the Z1 register in the GTE from a register. */ +#define gte_ldv1Z_reg( r0 ) __asm__ volatile ( \ + "mtc2 %0, $3" \ + : \ + : "r"( r0 ) ) + +/* Set the Z1 register in the GTE from memory. */ +#define gte_ldv1Z( r0 ) __asm__ volatile ( \ + "lwc2 $3, 0( %0 )" \ + : \ + : "r"( r0 ) ) + +/* Set the XY2 register in the GTE from a register. X in lower 16 bits, Y in upper 16 */ +#define gte_ldv2XY_reg( r0 ) __asm__ volatile ( \ + "mtc2 %0, $4" \ + : \ + : "r"( r0 ) ) + +/* Set the Z2 register in the GTE from a register. */ +#define gte_ldv2Z_reg( r0 ) __asm__ volatile ( \ + "mtc2 %0, $5" \ + : \ + : "r"( r0 ) ) + +/* Set the Z2 register in the GTE from memory. */ +#define gte_ldv2Z( r0 ) __asm__ volatile ( \ + "lwc2 $5, 0( %0 )" \ + : \ + : "r"( r0 ) ) + + +/******************************************************************************/ +/******************************************************************************/ + + +/* Clear the GTE translation matrix to 0,0,0 */ +#define gte_ClearTransMatrix( ) __asm__ volatile ( \ + "ctc2 $0, $5;" \ + "ctc2 $0, $6;" \ + "ctc2 $0, $7" \ + : ) + + +/******************************************************************************/ +/******************************************************************************/ + +/* Setup the 3 matrix components from a VECTOR structure. Used for Outer Product + calculations and interpolations +*/ +#define gte_SetMatrix012(r0) __asm__ volatile ( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "ctc2 $12, $0;" \ + "lw $12, 8( %0 );" \ + "ctc2 $13, $2;" \ + "ctc2 $12, $4;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13" ) + +/* Setup the IR1,IR2,IR3 components from a VECTOR structure. Used for Outer + Product calculations and interpolations +*/ +#define gte_ldir123(r0) __asm__ volatile ( \ + "lwc2 $9, 0( %0 );" \ + "lwc2 $10, 4( %0 );" \ + "lwc2 $11, 8( %0 )" \ + : \ + : "r"( r0 ) ) + +/* Read the IR1,IR2,IR3 registers into a VECTOR structure. */ +#define gte_stir123( r0 ) __asm__ volatile ( \ + "swc2 $9, 0( %0 );" \ + "swc2 $10, 4( %0 );" \ + "swc2 $11, 8( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +/* Read the MAC1,MAC2,MAC3 registers into a VECTOR structure. */ +#define gte_stmac123( r0 ) __asm__ volatile ( \ + "swc2 $25, 0( %0 );" \ + "swc2 $26, 4( %0 );" \ + "swc2 $27, 8( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + + +/******************************************************************************/ +/******************************************************************************/ + + +/* This is the same as MulMatrix, except that is uses the matrix already + setup in the GTE +*/ +#define gte_MulMatrix0AlreadySetup(r2,r3) \ + { \ + gte_ldclmv(r2); \ + gte_rtir(); \ + gte_stclmv(r3); \ + gte_ldclmv((char*)r2+2); \ + gte_rtir(); \ + gte_stclmv((char*)r3+2); \ + gte_ldclmv((char*)r2+4); \ + gte_rtir(); \ + gte_stclmv((char*)r3+4); \ + } + + +/* This is the same as CompMatrix, except that is uses the matrix already + setup in the GTE +*/ +#define gte_CompMatrixAlreadySetup(r2,r3) \ + { \ + gte_MulMatrix0AlreadySetup(r2,r3); \ + gte_ldlv0((char*)r2+20); \ + gte_rt(); \ + gte_stlvl((char*)r3+20); \ + } + + +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ + + +/* OK, OK!! I know this should be somewhere else, but I currently only need it here!! */ + +/* Multiply 2 signed 32 bit numbers for a 64 bit result and return the upper 32 bits */ +/* long mul64u32(long a, long b) */ +#define mul64u32(a,b) \ + ({ long r0,r1=a,r2=b; \ + __asm__ volatile ( \ + "mult %1, %2;" \ + "mfhi %0;" \ + : "=r"( r0 ) \ + : "r"( r1 ), "r"( r2 ) \ + ); \ + r0; }) + +/* Multiply 2 unsigned 32 bit numbers for a 64 bit result and return the upper 32 bits */ +/* long mulu64u32(unsigned long a, unsigned long b) */ +#define mulu64u32(a,b) \ + ({ long r0,r1=a,r2=b; \ + __asm__ volatile ( \ + "multu %1, %2;" \ + "mfhi %0;" \ + : "=r"( r0 ) \ + : "r"( r1 ), "r"( r2 ) \ + ); \ + r0; }) + +/****************/ + +/* Individual component instructions to start multiplies and read the results */ + +/* void start_SignedMultiply(long a, long b) */ +#define start_SignedMultiply(r0,r1) \ + __asm__ volatile ( \ + "mult %0, %1;" \ + : \ + : "r"( r0 ), "r"( r1 ) \ + ); \ + +/* void start_UnsignedMultiply(long a, long b) */ +#define start_UnsignedMultiply(r0,r1) \ + __asm__ volatile ( \ + "multu %0, %1;" \ + : \ + : "r"( r0 ), "r"( r1 ) \ + ); \ + +/* long get_MultiplyHigh(void) */ +#define get_MultiplyHigh() \ + ({ long r0; \ + __asm__ volatile ( \ + "mfhi %0;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/* long get_MultiplyLow(void) */ +#define get_MultiplyLow() \ + ({ long r0; \ + __asm__ volatile ( \ + "mflo %0;" \ + : "=r"( r0 ) \ + : \ + ); \ + r0; }) + +/****************/ + +#if 0 +/* Multiply 2 32 bit numbers for a 64 bit result and store the upper 32 bits */ +/* void mul64u32(long *dest,long a, long b) */ +#define mul64u32pointer( r0, r1, r2 ) __asm__ volatile ( \ + "mult %1, %2;" \ + "mfhi $12;" \ + "sw $12, 0( %0 );" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ + : "$12", "memory" ) + +/* Multiply 2 32 bit numbers for a 64 bit result and store the upper 32 bits */ +/* void mul64u32(long dest,long a, long b) */ +#define mul64u32value( r0, r1, r2 ) __asm__ volatile ( \ + "mult %1, %2;" \ + "mfhi %0;" \ + : "=r"( r0 ) \ + : "r"( r1 ), "r"( r2 ) \ + ) +#endif + +/******************************************************************************/ +/******************************************************************************/ + +/* This is passed a value, a base pointer register and the actual pointer value + to be read. + These are used to forcefully override the compiler's desire to move memory + reads later in the program until they are needed instead of leaving them + where they are which can actually be better in terms of not having a delay + slot while waiting for the read, or by being able to read early while the + memory bus is clear. ZZKJ +*/ +#define read_LONG(v,b0,a0) INTERNAL_read_LONG(v, b0,((ULONG)a0)-((ULONG)b0) ) +#define read_SLONG(v,b0,a0) INTERNAL_read_LONG(v, b0,((ULONG)a0)-((ULONG)b0) ) +#define read_ULONG(v,b0,a0) INTERNAL_read_LONG(v, b0,((ULONG)a0)-((ULONG)b0) ) +#define read_SWORD(v,b0,a0) INTERNAL_read_SWORD(v, b0,((ULONG)a0)-((ULONG)b0) ) +#define read_UWORD(v,b0,a0) INTERNAL_read_UWORD(v, b0,((ULONG)a0)-((ULONG)b0) ) +#define read_SBYTE(v,b0,a0) INTERNAL_read_SBYTE(v, b0,((ULONG)a0)-((ULONG)b0) ) +#define read_UBYTE(v,b0,a0) INTERNAL_read_UBYTE(v, b0,((ULONG)a0)-((ULONG)b0) ) + +#define INTERNAL_read_LONG(r0,r1,r2) \ + __asm__ volatile ( \ + "lw %0, %2( %1 )" \ + : "=r"( r0 ) \ + : "r"( r1 ), "i"( r2 ) \ + ) + +#define INTERNAL_read_SWORD(r0,r1,r2) \ + __asm__ volatile ( \ + "lh %0, %2( %1 )" \ + : "=r"( r0 ) \ + : "r"( r1 ), "i"( r2 ) \ + ) + +#define INTERNAL_read_UWORD(r0,r1,r2) \ + __asm__ volatile ( \ + "lhu %0, %2( %1 )" \ + : "=r"( r0 ) \ + : "r"( r1 ), "i"( r2 ) \ + ) + +#define INTERNAL_read_SBYTE(r0,r1,r2) \ + __asm__ volatile ( \ + "lb %0, %2( %1 )" \ + : "=r"( r0 ) \ + : "r"( r1 ), "i"( r2 ) \ + ) + +#define INTERNAL_read_UBYTE(r0,r1,r2) \ + __asm__ volatile ( \ + "lbu %0, %2( %1 )" \ + : "=r"( r0 ) \ + : "r"( r1 ), "i"( r2 ) \ + ) + + +/******************************************************************************/ + + +#endif /* __GTEMISC_H__ */