; ------------------------------------------------------
; INCA
; A 1 kilobyte effect (958 bytes actually)
; for the Neo Geo Pocket console (monochrome and color)
; ------------------------------------------------------
; Written by Franck "hitchhikr" Charlet
; ------------------------------------------------------

; ------------------------------------------------------
; Assembler mumbo jumbo
; ------------------------------------------------------
                    $maximum

prog_header         section code large

                    ; Make sure we are known from the ROM header
                    public  startup

; ------------------------------------------------------
; Includes
; ------------------------------------------------------
                    $include "ngp.inc"

; ------------------------------------------------------
; RAM constants
; ------------------------------------------------------
SINCOS_LEN          equ     256

SINCOS_TABLE_SIN    equ     WORK_RAM
SINCOS_TABLE_COS    equ     SINCOS_TABLE_SIN + (SINCOS_LEN)         ; total length: 1536 bytes

BACK_BUFFER         equ     SINCOS_TABLE_COS + (SINCOS_LEN * 11)

POINTS_2D           equ     BACK_BUFFER + 7000

MEM_TOP             equ     0x6f00

; ------------------------------------------------------
; ROM data
; ------------------------------------------------------
points_3d:
                    db      -44,  44,  44
                    db       44,  44,  44
                    db       44, -44,  44
                    db      -44, -44,  44
                    db      -44,  44, -44
                    db       44,  44, -44
                    db       44, -44, -44
                    db      -44, -44, -44
lines:
                    db      0, 1
                    db      1, 2
                    db      2, 3
                    db      3, 0
                    db      4, 5
                    db      5, 6
                    db      6, 7
                    db      7, 4
                    db      0, 4
                    db      1, 5
                    db      2, 6
                    db      3, 7
table_pixels_tiles:
                    db      0 + 1, 1 + 1, 2 + 1, 3 + 1, 4 + 1, 5 + 1, 6 + 1, 7 + 1
                    db      8 + 1, 9 + 1, 10 + 1, 11 + 1, 12 + 1, 13 + 1;, 14 + 1;, 15 + 1
                    ;db      16 + 1, 17 + 1, 18 + 1, 19 + 1
table_pixels_pos_nibble:
                    db      1, 1, 1, 1
                    db      0, 0, 0, 0
table_pixels:
                    db      0x80, 0x20, 0x8, 0x2

; ------------------------------------------------------
; Draw a line on screen (optimized for size)
; ------------------------------------------------------
DX                  equ     0
SX                  equ     2
X1                  equ     8
Y1                  equ     10
X2                  equ     12
Y2                  equ     14
draw_line:
                    dec     4, XSP
                    ld      WA, (XSP + X2)
                    sub     WA, (XSP + X1)
                    j       ge, delta_x
                    neg     WA                                      ; abs
delta_x:
                    ld      (XSP), WA                               ; DX
                    ld      BC, -1
                    ld      WA, (XSP + X1)
                    cp      WA, (XSP + X2)
                    j       ge, step_x
                    neg     BC
step_x:
                    ld      (XSP + SX), BC
                    ld      IZ, (XSP + Y1)
                    ld      HL, (XSP + Y2)
                    sub     HL, IZ
                    j       ge, delta_y
                    neg     HL                                      ; abs
delta_y:
                    ld      DE, -1                                  ; SY
                    cp      IZ, (XSP + Y2)
                    j       ge, step_y
                    neg     DE                                      ; SY
step_y:
                    cp      (XSP), HL                               ; DX
                    j       le, neg_err
                    ld      IX, (XSP)                               ; DX
                    j       pos_err
neg_err:
                    ld      IX, HL                                  ; DY
                    neg     IX
pos_err:
                    sra     1, IX                                   ; ERR
line_loop:
                    ld      IY, IZ                                  ; table_pixels_tiles[(x1 >> 3)] + ((y1 >> 3) * 20)]
                    srl     3, IY
                    mul     IY, 20                                  ; next line (probably slow)
                    ld      WA, (XSP + X1)
                    srl     3, WA
                    lda     XBC, table_pixels_tiles
                    ld      A, (XBC + WA)
                    extz    WA
                    add     WA, IY
                    ld      BC, WA
                    sll     4, BC
                    ld      IY, IZ
                    and     XIY, 0x7
                    add     XIY, XIY
                    add     IY, BC
                    ld      WA, (XSP + X1)
                    and     WA, 0x7                                 ; [table_pixels_pos_nibble[x1 & 7]]
                    lda     XBC, table_pixels_pos_nibble
                    ld      A, (XBC + WA)
                    extz    WA
                    add     IY, WA
                    add     XIY, BACK_BUFFER
                    ld      WA, (XSP + X1)
                    and     WA, 0x3                                 ; |= table_pixels[x1 & 3];
                    lda     XBC, table_pixels
                    ld      A, (XBC + WA)
                    or      (XIY), A
                    ld      WA, (XSP + X1)
                    cp      WA, (XSP + X2)
                    j       ne, check_end_of_line
                    cp      IZ, (XSP + Y2)
                    j       eq, line_done                           ; break;
check_end_of_line:
                    ld      BC, IX                                  ; ERR
                    ld      WA, (XSP)                               ; DX
                    neg     WA
                    cp      IX, WA
                    j       le, inc_x
                    sub     IX, HL                                  ; DY
                    ld      WA, (XSP + SX)
                    add     (XSP + X1), WA
inc_x:
                    cp      BC, HL                                  ; E2
                    j       ge, line_loop
                    add     IX, (XSP)                               ; DX
                    add     IZ, DE                                  ; SY
                    j       line_loop
line_done:
                    inc     4, XSP
                    ret

; ------------------------------------------------------
; Horizontal blank interrupt
; ------------------------------------------------------
hbl:
                    ldb     (CONTROL_2D), 0
                    cpb     (RAS_Y), 76                             ; reaching the middle of the screen
                    j       ult, change_polarity
                    ldb     (CONTROL_2D), NEG_2D
change_polarity:
                    reti

; ------------------------------------------------------
; Vertical blank interrupt
; ------------------------------------------------------
vbl:
                    ldb     (WATCHDOG), WATCHDOG_CLEAR
                    reti

; ------------------------------------------------------
; Program entry point
; ------------------------------------------------------
startup:            
                    di

                    ; Mandatory (152/160 pixels)
                    ldw     (WIN_WH), 0x98a0

                    ; Clear the tiles
                    lda     XIX, TILE_RAM
                    ld      BC, 8192 / 4
                    xor     XWA, XWA
clear_tiles:
                    ld      (XIX+), XWA
                    djnz    BC, clear_tiles

                    ; Clear the 900h RAM
                    lda     XIX, WORK_RAM
                    ld      BC, (MEM_TOP - WORK_RAM) / 4
clear_work_ram:
                    ld      (XIX+), XWA
                    djnz    BC, clear_work_ram

                    ; Create the tiles grid
                    ld      IX, 1
                    xor     BC, BC
create_grid_y:
                    ld      IZ, 20
                    xor     XDE, XDE
create_grid_x:
                    lda     XIY, XDE + BC
                    lda     XHL, SCR1_RAM
                    add     XHL, XIY
                    ld      A, IXL
                    ld      (XHL+), A                               ; chr
                    cp      IX, 256
                    scc     ge, A
                    ld      (XHL), A                                ; prop
                    inc     1, IX
                    inc     2, XDE
                    djnz    IZ, create_grid_x
                    add     BC, 64
                    cp      BC, 19 * 64
                    jr      lt, create_grid_y

                    ; Create the sincos table
                    ld      XDE, 0x3211a
                    xor     XIY, XIY
                    lda     XIX, SINCOS_TABLE_SIN
                    ld      BC, SINCOS_LEN
create_sincos:
                    rlc     0, XIY                                  ; actually a 16 bits shift
                    ld      IZ, IY
                    ld      (XIX+), IZ
                    ld      (XIX + (SINCOS_LEN * 4) - 2), IZ
                    neg     IZ
                    ld      (XIX + (SINCOS_LEN * 2) - 2), IZ
                    rlc     0, XIY
                    ld      XWA, XIY
                    ld      XHL, 0x9de80
                    xor     XIZ, XIZ
calc_curve:
                    or      XWA, XWA
                    jr      eq, done_curve
                    sra     1, XWA
                    jr      nc, next_value
                    sub     XDE, XIZ
next_value:
                    add     XHL, XHL
                    adc     XIZ, XIZ
                    jr      calc_curve
done_curve:
                    add     XIY, XDE
                    djnz    BC, create_sincos

                    ; Set the color level of the cube
                    ldb     (SCR1_LUT + 2), 7

                    ; Install the horizontal blank interrupt
                    ldb     (T01MOD), 0                             ; Timer 0, 1 are in <8 bit timer mode>
                    ldb     RW3, VECT_INTLVSET
                    ldb     RB3, 3                                  ; Interrupt level
                    ldb     RC3, 2                                  ; Interrupt request from 8 bit timer 0
                    swi     1
                    lda     XWA, hbl
                    ld      (HBL_INT), XWA
                    set     0, (TRUN)                               ; Start count of timer 0

                    ; Install the vertical blank interrupt
                    lda     XWA, vbl
                    ld      (VBL_INT), XWA

                    ei

; ------------------------------------------------------
; Loop of the program
; ------------------------------------------------------
main_loop:
                    ;ld      A, (STATUS_2D)
                    ;and     A, 0x40
                    ;jr      eq, main_loop
                    
                    ; Clear the drawing buffer
                    lda     XIX, BACK_BUFFER + 2000
                    ld      BC, 600 / 8
                    xor     XWA, XWA
clear_screen:
                    ld      (XIX+), XWA
                    ld      (XIX+), XWA
                    ld      (XIX+), XWA
                    ld      (XIX+), XWA
                    ld      (XIX+), XWA
                    ld      (XIX+), XWA
                    ld      (XIX+), XWA
                    ld      (XIX+), XWA
                    djnz    BC, clear_screen

                    lda     XWA, SINCOS_TABLE_SIN
                    ld      BC, (XSP+8)                             ; move_x
                    inc     8, BC
                    and     BC, 0x3fe
                    ld      (XSP+8), BC
                    ld      HL, (XWA + BC)
                    sra     3, HL
                    dec     5, HL
                    ld      (SCR1_X), L

                    ld      BC, (XSP+10)                            ; move_y
                    inc     4, BC
                    and     BC, 0x3fe
                    ld      (XSP+10), BC
                    ld      HL, (XWA + BC)
                    sra     3, HL
                    inc     3, HL
                    ld      (SCR1_Y), L

                    ld      BC, (XSP+12)                            ; alpha
                    inc     4, BC
                    and     BC, 0x3fe
                    ld      (XSP+12), BC
                    lda     XWA, SINCOS_TABLE_SIN
                    ld      DE, (XWA + BC)
                    ld      (XSP), DE
                    lda     XHL, SINCOS_TABLE_COS
                    ld      DE, (XHL + BC)
                    ld      (XSP + 2), DE

                    ld      BC, (XSP+14)                            ; beta
                    inc     8, BC
                    and     BC, 0x3fe
                    ld      (XSP+14), BC
                    ld      DE, (XWA + BC)
                    ld      (XSP + 4), DE
                    ld      DE, (XHL + BC)
                    ld      (XSP + 6), DE

                    xor     IZ, IZ
move_points:
                    lda     XWA, points_3d
                    ld      HL, IZ
                    mul     HL, 3
                    add     WA, HL
                    ld      E, (XWA+)
                    ld      D, (XWA+)
                    ld      C, (XWA)
                    exts    BC

                    ld      A, E                                    ; point_x
                    exts    WA
                    muls    XWA, (XSP)
                    ld      HL, WA
                    ld      WA, BC                                  ; point_z
                    muls    XWA, (XSP)
                    ld      IY, WA
                    ld      A, E
                    exts    WA
                    muls    XWA, (XSP + 2)
                    ld      IX, WA
                    ld      WA, BC
                    muls    XWA, (XSP + 2)
                    add     HL, WA
                    sra     8,  HL
                    sub     IY, IX
                    sra     8,  IY
                    ld      BC, IY

                    ld      E, L
                    exts    HL
                    muls    XHL, (XSP + 4)
                    ld      A, D                                    ; point_y
                    exts    WA
                    muls    XWA, (XSP + 4)
                    ld      IY, WA
                    ld      A, E
                    exts    WA
                    muls    XWA, (XSP + 6)
                    ld      IX, WA
                    ld      A, D
                    exts    WA
                    muls    XWA, (XSP + 6)
                    add     HL, WA
                    sub     IY, IX

                    add     BC, 600
                    exts    XHL
                    divs    XHL, BC
                    exts    XIY
                    divs    XIY, BC

                    add     HL, 160 / 2
                    add     IY, 152 / 2

                    ld      DE, IZ
                    sll     2, DE
                    lda     XWA, POINTS_2D
                    add     WA, DE
                    ld      (XWA+), HL
                    ld      (XWA), IY

                    inc     1, IZ
                    cp      IZ, 8
                    jrl     lt, move_points

                    xor     IZ, IZ
draw_lines:
                    push    IZ

                    lda     XWA, lines
                    add     IZ, IZ
                    add     WA, IZ
                    ld      E, (XWA+)                               ; line_1
                    ld      L, (XWA)                                ; line_2
                    extz    DE
                    extz    HL
                    sll     2, DE
                    sll     2, HL

                    lda     XWA, POINTS_2D

                    add     WA, DE
                    pushw   (XWA)                                   ; points_2d[line_1].x
                    pushw   (XWA + 2)                               ; points_2d[line_1].y
                    sub     WA, DE                                  ; nullify the add
                    add     WA, HL
                    pushw   (XWA)                                   ; points_2d[line_2].x
                    pushw   (XWA + 2)                               ; points_2d[line_2].y

                    calr    draw_line
                    inc     8, XSP

                    pop     IZ

                    inc     1,  IZ
                    cp      IZ, 12
                    jr      lt, draw_lines

                    ; Writing to VRAM is EXTREMELY slow
                    lda     XHL, BACK_BUFFER + (32 * 60)
                    lda     XDE, TILE_RAM + (32 * 60)
                    ld      BC, 650 / 8
copy_buffer:
                    ld      XWA, (XHL+)
                    ld      (XDE+), XWA
                    ld      XWA, (XHL+)
                    ld      (XDE+), XWA
                    ld      XWA, (XHL+)
                    ld      (XDE+), XWA
                    ld      XWA, (XHL+)
                    ld      (XDE+), XWA
                    ld      XWA, (XHL+)
                    ld      (XDE+), XWA
                    ld      XWA, (XHL+)
                    ld      (XDE+), XWA
                    ld      XWA, (XHL+)
                    ld      (XDE+), XWA
                    ld      XWA, (XHL+)
                    ld      (XDE+), XWA
                    djnz    BC, copy_buffer

                    j       main_loop

                    end
