{$R-}
Program AsmWater; Uses CRT;
var
         WaterBuf : array[ 0..127999 ] of Integer; -- two buffers in one
   DatBuf, BufScr : array[ 0..64000 ] of byte; -- picture and screen buffers
           paleta : array[ 1..768 ] of byte;
           t2, t1,
  frame, WaterAdr,
   VirScr, DatAdr : Dword;
             plik : file;
   i, nx, ny, lic : word;
               cp : byte;
            timer : dword absolute $0046C;
               ch : char;

--------------------------------------------------------------
----- Procedures & functions for demo engine start here ------
--------------------------------------------------------------

{$L fastmove.obj}

procedure FillDWord( src, sizes, what : Dword ); external; -- fills memory with 4 bytes at once
procedure MoveD( src, dest, sizes : Dword ); external;
procedure SetMode( tryb : word ); external;
procedure FlipScreen( adres : dword ); external; --- with wait for retrace

procedure LoadBMP( BMPName : string ); --- loads 320x200x256 BMP
var
      n, i : byte;
     Fsize : Dword;
begin
  assign( plik, BMPName );
  {$I-}
  reset( plik, 1 );
  {$I+}
  if IOResult <> 0
    then begin
           SetMode( $0003 );
           writeLn('No Pictures found.');
           halt( 1 );
         end;
  Fsize:=FileSize( plik );
  seek( plik, 54 );
  BlockRead( plik, DatBuf, 1024 );
  for i:=0 to 255 do
    begin
      paleta[ i*3+3 ]:=DatBuf[ i*4+0 ] SHR 2;
      paleta[ i*3+2 ]:=DatBuf[ i*4+1 ] SHR 2;
      paleta[ i*3+1 ]:=DatBuf[ i*4+2 ] SHR 2;
    end;
  seek( plik, 1078 );
  BlockRead( plik, DatBuf, Fsize-1078 );
  for i:=0 to 199 do
    moveD( DatAdr+320*i, VirScr+( 199-i )*320, 80 );
  close( plik );
end;

procedure SetPal; assembler;
asm
  xor  AL, AL
  mov  DX, $3C8
  out  DX, AL
  mov  ESI, offset paleta
  mov  CX, 768
  mov  DX, $3C9
  rep  outsb
end;

procedure ShowWater( Cpg : byte ); assembler;
 ---------------------------------------------
 --- Fully optimized procedure!! 100% asm! ---
 ---------------------------------------------
var
     Cpage, Spage : dword;            -- offsets of buffers
 ------------------------------------------------------------------------
 ---  Cpage:=Cpg*64000;                                               ---
 ---  Spage:=( Cpg XOR $01 )*64000;                                   ---
 ---  for i:=1 to height-2 do  { height = height of screen }          ---
 ---    begin                                                         ---
 ---      wn:=( i SHL 6 )+( i SHL 8 ); { wn:=i*320; }                 ---
 ---      for j:=1 to width-2 do { width = width of screen }          ---
 ---        begin                                                     ---
 ---          inc( wn );                                              ---
 ---          NewWater:=(( WaterBuf[ Cpage+wn+1 ]+                    ---
 ---                       WaterBuf[ Cpage+wn+319 ]+                  ---
 ---                       WaterBuf[ Cpage+wn+320 ]+                  ---
 ---                       WaterBuf[ Cpage+wn+321 ]+                  ---
 ---                       WaterBuf[ Cpage+wn-319 ]+                  ---
 ---                       WaterBuf[ Cpage+wn-320 ]+                  ---
 ---                       WaterBuf[ Cpage+wn-321 ]+                  ---
 ---                       WaterBuf[ Cpage+wn-1 ] ) SHR 2 )-          ---
 ---                       WaterBuf[ Spage+wn ];                      ---
 ---          WaterBuf[ Spage+wn ]:=NewWater-( NewWater SHR density );---
 ---        end { try density from 4 to 8 }                           ---
 ---     end;                                                         ---
 ------------------------------------------------------------------------
 ------------- Pascal version of "calculate water" is above -------------
 ------------------- Asm optimized version is below ---------------------
 ------------------------------------------------------------------------
 asm
            mov  AL, [Cpg]
            and  AL, $01                 -- which page ?
             jz  @setp1
            mov  [Cpage], 128000        -- Cpage set as the second page
            mov  [Spage], 0
            jmp  @setp2
   @setp1:
            mov  [Cpage], 0             -- Cpage set as the first page
            mov  [Spage], 128000
   @setp2:
            mov  DX, 1                  -- Y counter
            xor  EBX, EBX
            mov  EBX, 640               -- EBX:=( Y*320 )*2
            mov  EDI, [WaterAdr]
            add  EDI, EBX               -- "WaterAdr" = ofs of our buffers
            mov  ESI, EDI               -- to this point EDI = ESI
            add  EDI, [Cpage]
            add  ESI, [Spage]           -- here is the difference
            mov  CX, 318                -- X counter: screen size-2
   @1stlp:                              -- main loop is here !!!
            add  ESI, 2
            add  EDI, 2                 -- use integers so all *2
            mov  AX, [EDI-2]
            add  AX, [EDI-638]
            add  AX, [EDI-640]
            add  AX, [EDI-642]
            add  AX, [EDI+2]
            add  AX, [EDI+638]
            add  AX, [EDI+640]
            add  AX, [EDI+642] -- eight pixel method. Slower but looks better
            sar  AX, 2
            mov  BX, [ESI]
            sub  AX, BX
            mov  BX, AX
            sar  BX, 4                 -- density of water
            sub  AX, BX
            mov  [ESI], AX             -- done. move value to dest. buffer
            dec  CX
            jnz  @1stlp                -- end of "little" loop
            add  ESI, 4                -- add 4 up to 320 (*2 of course)
            add  EDI, 4                -- use integers so all *2
            mov  CX, 318               -- restore X counter
            inc  DX
            cmp  DX, 198
             jb  @1stlp                -- end of "calculate water"
 -------------------------------------------------------------------
 --- for i:=1 to height-2 do                                     ---
 ---   begin                                                     ---
 ---     wn:=i*320;                                              ---
 ---     for j:=1 to width-2 do                                  ---
 ---       begin                                                 ---
 ---         inc( wn );                                          ---
 ---         dx:=WaterBuf[ Spage+wn ]-WaterBuf[ Spage+wn+320 ];  ---
 ---         dy:=WaterBuf[ Spage+wn ]-WaterBuf[ Spage+wn+1 ];    ---
 ---         wsx:=( dx SHR 3 ) + j;                              ---
 ---         wsy:=( dy SHR 3 ) + i;                              ---
 ---         bajt:=mem[ DatAdr+wsx+wsy*320 ]; {buffer with image}---
 ---         mem[ VirScr+j+i*320 ]:=bajt; {into screen buffer}   ---
 ---       end                                                   ---
 ---   end;                                                      ---
 -------------------------------------------------------------------
 -------------------------------------------------------------------
 ------------ Pascal version of "paint water" is above -------------
 ----------------- Asm optimized version is below ------------------
 -------------------------------------------------------------------
            mov  i, 198            -- i = counter. Y size of screen: 200-2
            mov  EDI, [WaterAdr]
            add  EDI, [Spage]
            mov  ESI, [VirScr]
            mov  EBX, 63360        -- ( Y size of screen-2 )*320
            add  ESI, EBX          -- ESI:=Ofs( mem[ VirScr+i*320 ] )
            shl  EBX, 1
            add  EDI, EBX          -- EDI:=Ofs( WaterBuf[ Spage+i*320 ] )
            xor  CX, CX
    @lloop:                        -- main loop starts here !!!
            inc  CX
            xor  EBX, EBX
            inc  EDI
            inc  EDI               -- use integers, so all *2
            mov  BX, [EDI]         -- BX:=WaterBuf[ Spage+i*320+j ]
            mov  AX, [EDI+2]       -- DX:=WaterBuf[ Spage+i*320+j+1 ]
            sub  BX, AX            -- BX:=BX-DX   (dy)
            mov  DX, [EDI]         -- DX:=WaterBuf[ Spage+i*320+j ]
            mov  AX, [EDI+640]     -- AX:=WaterBuf[ Spage+i*320+j+320 ]
            sub  DX, AX            -- DX:=DX-AX   (dx)
            sar  DX, 3
            add  DX, CX            -- DX: ofsx = ( wdx SHR 3 ) + CX(X)
            sar  BX, 3
            add  BX, [i]           -- BX: ofsy = ( wdy SHR 3 ) + i (Y)
            shl  BX, 6
            mov  AX, BX
            shl  BX, 2
            add  BX, AX            -- BX:= ofsy*320
            add  BX, DX            -- BX = ofsx+ofsy*320
            add  EBX, [DatAdr]
            mov  AL, [EBX]         -- AL:=mem[ DatAdr+BX ]
            inc  ESI
            mov  [ESI], AL         -- mem[ VirScr+i*320+j ]:=AL
            cmp  CX, 318           -- X size of screen: 320-2
             jb  @lloop
            xor  CX, CX
            sub  ESI, 638          -- sub X screen size and all "inc ESI" in lloop
            sub  EDI, 1276         -- sub 2*X screen size and all "inc EDI"*2 in lloop
            dec  i
            and  [i], $FF
            jnz  @lloop            -- end of "paint water"
end; { of procedure "ShowWater" }

BEGIN
  VirScr:=Ofs( BufScr );
  DatAdr:=Ofs( DatBuf );
WaterAdr:=Ofs( WaterBuf );
  FillDWord( WaterAdr, 64000, $00 ); --- clear all buffers
  SetMode( $13 );
  LoadBMP( 'sea.bmp' );
  SetPal;
  moveD( VirScr, DatAdr, 16000 ); --- VirScr is our screen buffer
  FlipScreen( DatAdr );
  cp:=0;  lic:=0;  frame:=0;  t1:=timer;
  repeat
    inc( lic );
    nx:=round( cos( lic/40 )*110+160 );
    ny:=round( sin( lic/20 )*80+100 );
    WaterBuf[ nx+( ny SHL 8 )+( ny SHL 6 ) ]:=500;
    ShowWater( cp );
    FlipScreen( VirScr );
    inc( frame );
    cp:=cp XOR $01;
  until KeyPressed;
  t2:=timer-t1;
  ch:=ReadKey;
  SetMode( $03 );
  writeLn((( frame*18.2 )/t2 ):6:2,' frames per second.');
  repeat until KeyPressed;  ch:=ReadKey;
END.