You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
RnQ/for.RnQ/Graphics32/GR32_Math.pas

843 lines
21 KiB
Plaintext

unit GR32_Math;
(* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1 or LGPL 2.1 with linking exception
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* Alternatively, the contents of this file may be used under the terms of the
* Free Pascal modified version of the GNU Lesser General Public License
* Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
* of this license are applicable instead of those above.
* Please see the file LICENSE.txt for additional information concerning this
* license.
*
* The Original Code is Additional Math Routines for Graphics32
*
* The Initial Developer of the Original Code is
* Mattias Andersson
* (parts of this unit were moved here from GR32_System.pas and GR32.pas by Alex A. Denisov)
*
* Portions created by the Initial Developer are Copyright (C) 2005-2009
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Michael Hansen
*
* ***** END LICENSE BLOCK ***** *)
interface
{$I GR32.inc}
uses GR32;
{ Fixed point math routines }
function FixedFloor(A: TFixed): Integer;
function FixedCeil(A: TFixed): Integer;
function FixedMul(A, B: TFixed): TFixed;
function FixedDiv(A, B: TFixed): TFixed;
function OneOver(Value: TFixed): TFixed;
function FixedRound(A: TFixed): Integer;
function FixedSqr(Value: TFixed): TFixed;
function FixedSqrtLP(Value: TFixed): TFixed; // 8-bit precision
function FixedSqrtHP(Value: TFixed): TFixed; // 16-bit precision
// Fixed point interpolation
function FixedCombine(W, X, Y: TFixed): TFixed;
{ Trigonometric routines }
procedure SinCos(const Theta: TFloat; out Sin, Cos: TFloat); overload;
procedure SinCos(const Theta, Radius: Single; out Sin, Cos: Single); overload;
function Hypot(const X, Y: TFloat): TFloat; overload;
function Hypot(const X, Y: Integer): Integer; overload;
function FastSqrt(const Value: TFloat): TFloat;
function FastSqrtBab1(const Value: TFloat): TFloat;
function FastSqrtBab2(const Value: TFloat): TFloat;
function FastInvSqrt(const Value: Single): Single; {$IFDEF INLININGSUPPORTED} inline; {$ENDIF} overload;
{ Misc. Routines }
{ MulDiv a faster implementation of Windows.MulDiv funtion }
function MulDiv(Multiplicand, Multiplier, Divisor: Integer): Integer;
// tells if X is a power of 2, returns true when X = 1,2,4,8,16 etc.
function IsPowerOf2(Value: Integer): Boolean; {$IFDEF INLININGSUPPORTED} inline; {$ENDIF}
// returns X rounded down to the nearest power of two
function PrevPowerOf2(Value: Integer): Integer;
// returns X rounded down to the nearest power of two, i.e. 5 -> 8, 7 -> 8, 15 -> 16
function NextPowerOf2(Value: Integer): Integer;
// fast average without overflow, useful for e.g. fixed point math
function Average(A, B: Integer): Integer;
// fast sign function
function Sign(Value: Integer): Integer;
function FloatMod(x, y: Double): Double; {$IFDEF INLININGSUPPORTED} inline; {$ENDIF}
implementation
uses
Math;
{$IFDEF PUREPASCAL}
const
FixedOneS: Single = 65536;
{$ENDIF}
{ Fixed-point math }
function FixedFloor(A: TFixed): Integer;
{$IFDEF PUREPASCAL}
begin
Result := A div FIXEDONE;
{$ELSE}
asm
{$IFDEF TARGET_x86}
SAR EAX, 16
{$ENDIF}
{$IFDEF TARGET_x64}
MOV EAX, ECX
SAR EAX, 16
{$ENDIF}
{$ENDIF}
end;
function FixedCeil(A: TFixed): Integer;
{$IFDEF PUREPASCAL}
begin
Result := (A + $FFFF) div FIXEDONE;
{$ELSE}
asm
{$IFDEF TARGET_x86}
ADD EAX, $0000FFFF
SAR EAX, 16
{$ENDIF}
{$IFDEF TARGET_x64}
MOV EAX, ECX
ADD EAX, $0000FFFF
SAR EAX, 16
{$ENDIF}
{$ENDIF}
end;
function FixedRound(A: TFixed): Integer;
{$IFDEF PUREPASCAL}
begin
Result := (A + $7FFF) div FIXEDONE;
{$ELSE}
asm
{$IFDEF TARGET_x86}
ADD EAX, $00007FFF
SAR EAX, 16
{$ENDIF}
{$IFDEF TARGET_x64}
MOV EAX, ECX
ADD EAX, $00007FFF
SAR EAX, 16
{$ENDIF}
{$ENDIF}
end;
function FixedMul(A, B: TFixed): TFixed;
{$IFDEF PUREPASCAL}
begin
Result := Round(A * FixedToFloat * B);
{$ELSE}
asm
{$IFDEF TARGET_x86}
IMUL EDX
SHRD EAX, EDX, 16
{$ENDIF}
{$IFDEF TARGET_x64}
MOV EAX, ECX
IMUL EDX
SHRD EAX, EDX, 16
{$ENDIF}
{$ENDIF}
end;
function FixedDiv(A, B: TFixed): TFixed;
{$IFDEF PUREPASCAL}
begin
Result := Round(A / B * FixedOne);
{$ELSE}
asm
{$IFDEF TARGET_x86}
MOV ECX, B
CDQ
SHLD EDX, EAX, 16
SHL EAX, 16
IDIV ECX
{$ENDIF}
{$IFDEF TARGET_x64}
MOV EAX, ECX
MOV ECX, EDX
CDQ
SHLD EDX, EAX, 16
SHL EAX, 16
IDIV ECX
{$ENDIF}
{$ENDIF}
end;
function OneOver(Value: TFixed): TFixed;
{$IFDEF PUREPASCAL}
const
Dividend: Single = 4294967296; // FixedOne * FixedOne
begin
Result := Round(Dividend / Value);
{$ELSE}
asm
{$IFDEF TARGET_x86}
MOV ECX, Value
XOR EAX, EAX
MOV EDX, 1
IDIV ECX
{$ENDIF}
{$IFDEF TARGET_x64}
XOR EAX, EAX
MOV EDX, 1
IDIV ECX
{$ENDIF}
{$ENDIF}
end;
function FixedSqr(Value: TFixed): TFixed;
{$IFDEF PUREPASCAL}
begin
Result := Round(Value * FixedToFloat * Value);
{$ELSE}
asm
{$IFDEF TARGET_x86}
IMUL EAX
SHRD EAX, EDX, 16
{$ENDIF}
{$IFDEF TARGET_x64}
MOV EAX, Value
IMUL EAX
SHRD EAX, EDX, 16
{$ENDIF}
{$ENDIF}
end;
function FixedSqrtLP(Value: TFixed): TFixed;
{$IFDEF PUREPASCAL}
begin
Result := Round(Sqrt(Value * FixedOneS));
{$ELSE}
asm
{$IFDEF TARGET_x86}
PUSH EBX
MOV ECX, EAX
XOR EAX, EAX
MOV EBX, $40000000
@SqrtLP1:
MOV EDX, ECX
SUB EDX, EBX
JL @SqrtLP2
SUB EDX, EAX
JL @SqrtLP2
MOV ECX,EDX
SHR EAX, 1
OR EAX, EBX
SHR EBX, 2
JNZ @SqrtLP1
SHL EAX, 8
JMP @SqrtLP3
@SqrtLP2:
SHR EAX, 1
SHR EBX, 2
JNZ @SqrtLP1
SHL EAX, 8
@SqrtLP3:
POP EBX
{$ENDIF}
{$IFDEF TARGET_x64}
PUSH RBX
XOR EAX, EAX
MOV EBX, $40000000
@SqrtLP1:
MOV EDX, ECX
SUB EDX, EBX
JL @SqrtLP2
SUB EDX, EAX
JL @SqrtLP2
MOV ECX,EDX
SHR EAX, 1
OR EAX, EBX
SHR EBX, 2
JNZ @SqrtLP1
SHL EAX, 8
JMP @SqrtLP3
@SqrtLP2:
SHR EAX, 1
SHR EBX, 2
JNZ @SqrtLP1
SHL EAX, 8
@SqrtLP3:
POP RBX
{$ENDIF}
{$ENDIF}
end;
function FixedSqrtHP(Value: TFixed): TFixed;
{$IFDEF PUREPASCAL}
begin
Result := Round(Sqrt(Value * FixedOneS));
{$ELSE}
asm
{$IFDEF TARGET_x86}
PUSH EBX
MOV ECX, EAX
XOR EAX, EAX
MOV EBX, $40000000
@SqrtHP1:
MOV EDX, ECX
SUB EDX, EBX
jb @SqrtHP2
SUB EDX, EAX
jb @SqrtHP2
MOV ECX,EDX
SHR EAX, 1
OR EAX, EBX
SHR EBX, 2
JNZ @SqrtHP1
JZ @SqrtHP5
@SqrtHP2:
SHR EAX, 1
SHR EBX, 2
JNZ @SqrtHP1
@SqrtHP5:
MOV EBX, $00004000
SHL EAX, 16
SHL ECX, 16
@SqrtHP3:
MOV EDX, ECX
SUB EDX, EBX
jb @SqrtHP4
SUB EDX, EAX
jb @SqrtHP4
MOV ECX, EDX
SHR EAX, 1
OR EAX, EBX
SHR EBX, 2
JNZ @SqrtHP3
JMP @SqrtHP6
@SqrtHP4:
SHR EAX, 1
SHR EBX, 2
JNZ @SqrtHP3
@SqrtHP6:
POP EBX
{$ENDIF}
{$IFDEF TARGET_x64}
PUSH RBX
XOR EAX, EAX
MOV EBX, $40000000
@SqrtHP1:
MOV EDX, ECX
SUB EDX, EBX
jb @SqrtHP2
SUB EDX, EAX
jb @SqrtHP2
MOV ECX,EDX
SHR EAX, 1
OR EAX, EBX
SHR EBX, 2
JNZ @SqrtHP1
JZ @SqrtHP5
@SqrtHP2:
SHR EAX, 1
SHR EBX, 2
JNZ @SqrtHP1
@SqrtHP5:
MOV EBX, $00004000
SHL EAX, 16
SHL ECX, 16
@SqrtHP3:
MOV EDX, ECX
SUB EDX, EBX
jb @SqrtHP4
SUB EDX, EAX
jb @SqrtHP4
MOV ECX, EDX
SHR EAX, 1
OR EAX, EBX
SHR EBX, 2
JNZ @SqrtHP3
JMP @SqrtHP6
@SqrtHP4:
SHR EAX, 1
SHR EBX, 2
JNZ @SqrtHP3
@SqrtHP6:
POP RBX
{$ENDIF}
{$ENDIF}
end;
function FixedCombine(W, X, Y: TFixed): TFixed;
// EAX <- W, EDX <- X, ECX <- Y
// combine fixed value X and fixed value Y with the weight of X given in W
// Result Z = W * X + (1 - W) * Y = Y + (X - Y) * W
// Fixed Point Version: Result Z = Y + (X - Y) * W / 65536
{$IFDEF PUREPASCAL}
begin
Result := Round(Y + (X - Y) * FixedToFloat * W);
{$ELSE}
asm
{$IFDEF TARGET_x86}
SUB EDX, ECX
IMUL EDX
SHRD EAX, EDX, 16
ADD EAX, ECX
{$ENDIF}
{$IFDEF TARGET_x64}
MOV EAX, ECX
SUB EDX, R8D
IMUL EDX
SHRD EAX, EDX, 16
ADD EAX, R8D
{$ENDIF}
{$ENDIF}
end;
{ Trigonometry }
procedure SinCos(const Theta: TFloat; out Sin, Cos: TFloat);
{$IFDEF NATIVE_SINCOS}
var
S, C: Extended;
begin
Math.SinCos(Theta, S, C);
Sin := S;
Cos := C;
{$ELSE}
{$IFDEF TARGET_x64}
var
Temp: DWord = 0;
{$ENDIF}
asm
{$IFDEF TARGET_x86}
FLD Theta
FSINCOS
FSTP DWORD PTR [EDX] // cosine
FSTP DWORD PTR [EAX] // sine
{$ENDIF}
{$IFDEF TARGET_x64}
MOVD Temp, Theta
FLD Temp
FSINCOS
FSTP [Sin] // cosine
FSTP [Cos] // sine
{$ENDIF}
{$ENDIF}
end;
procedure SinCos(const Theta, Radius: TFloat; out Sin, Cos: TFloat);
{$IFDEF NATIVE_SINCOS}
var
S, C: Extended;
begin
Math.SinCos(Theta, S, C);
Sin := S * Radius;
Cos := C * Radius;
{$ELSE}
asm
{$IFDEF TARGET_x86}
FLD Theta
FSINCOS
FMUL Radius
FSTP DWORD PTR [EDX] // cosine
FMUL Radius
FSTP DWORD PTR [EAX] // sine
{$ENDIF}
{$IFDEF TARGET_x64}
MOVD Temp, Theta
FLD Temp
MOVD Temp, Radius
FSINCOS
FMUL Temp
FSTP [Cos]
FMUL Temp
FSTP [Sin]
{$ENDIF}
{$ENDIF}
end;
function Hypot(const X, Y: TFloat): TFloat;
{$IFDEF PUREPASCAL}
begin
Result := Sqrt(Sqr(X) + Sqr(Y));
{$ELSE}
asm
{$IFDEF TARGET_x86}
FLD X
FMUL ST,ST
FLD Y
FMUL ST,ST
FADDP ST(1),ST
FSQRT
FWAIT
{$ENDIF}
{$IFDEF TARGET_x64}
MULSS XMM0, XMM0
MULSS XMM1, XMM1
ADDSS XMM0, XMM1
SQRTSS XMM0, XMM0
{$ENDIF}
{$ENDIF}
end;
function Hypot(const X, Y: Integer): Integer;
//{$IFDEF PUREPASCAL}
begin
Result := Round(Math.Hypot(X, Y));
(*
{$ELSE}
asm
{$IFDEF TARGET_x64}
IMUL RAX, RCX, RDX
{$ELSE}
FLD X
FMUL ST,ST
FLD Y
FMUL ST,ST
FADDP ST(1),ST
FSQRT
FISTP [ESP - 4]
MOV EAX, [ESP - 4]
FWAIT
{$ENDIF}
{$ENDIF}
*)
end;
function FastSqrt(const Value: TFloat): TFloat;
// see http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Approximations_that_depend_on_IEEE_representation
{$IFDEF PUREPASCAL}
var
I: Integer absolute Value;
J: Integer absolute Result;
begin
J := (I - $3F800000) div 2 + $3F800000;
{$ELSE}
asm
{$IFDEF TARGET_x86}
MOV EAX, DWORD PTR Value
SUB EAX, $3F800000
SAR EAX, 1
ADD EAX, $3F800000
MOV DWORD PTR [ESP - 4], EAX
FLD DWORD PTR [ESP - 4]
{$ENDIF}
{$IFDEF TARGET_x64}
SQRTSS XMM0, XMM0
{$ENDIF}
{$ENDIF}
end;
function FastSqrtBab1(const Value: TFloat): TFloat;
// see http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Approximations_that_depend_on_IEEE_representation
// additionally one babylonian step added
const
CHalf : TFloat = 0.5;
{$IFDEF PUREPASCAL}
var
I: Integer absolute Value;
J: Integer absolute Result;
begin
J := (I - $3F800000) div 2 + $3F800000;
Result := CHalf * (Result + Value / Result);
{$ELSE}
asm
{$IFDEF TARGET_x86}
MOV EAX, Value
SUB EAX, $3F800000
SAR EAX, 1
ADD EAX, $3F800000
MOV DWORD PTR [ESP - 4], EAX
FLD Value
FDIV DWORD PTR [ESP - 4]
FADD DWORD PTR [ESP - 4]
FMUL CHalf
{$ENDIF}
{$IFDEF TARGET_x64}
SQRTSS XMM0, XMM0
{$ENDIF}
{$ENDIF}
end;
function FastSqrtBab2(const Value: TFloat): TFloat;
// see http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Approximations_that_depend_on_IEEE_representation
// additionally two babylonian steps added
{$IFDEF PUREPASCAL}
const
CQuarter : TFloat = 0.25;
var
J: Integer absolute Result;
begin
Result := Value;
J := ((J - (1 shl 23)) shr 1) + (1 shl 29);
Result := Result + Value / Result;
Result := CQuarter * Result + Value / Result;
{$ELSE}
const
CHalf : TFloat = 0.5;
asm
{$IFDEF TARGET_x86}
MOV EAX, Value
SUB EAX, $3F800000
SAR EAX, 1
ADD EAX, $3F800000
MOV DWORD PTR [ESP - 4], EAX
FLD Value
FDIV DWORD PTR [ESP - 4]
FADD DWORD PTR [ESP - 4]
FMUL CHalf
{$ENDIF}
{$IFDEF TARGET_x64}
MOVD EAX, Value
SUB EAX, $3F800000
SAR EAX, 1
ADD EAX, $3F800000
MOVD XMM1, EAX
DIVSS XMM0, XMM1
ADDSS XMM0, XMM1
MOVD XMM1, CHalf
MULSS XMM0, XMM1
{$ENDIF}
{$ENDIF}
end;
function FastInvSqrt(const Value: Single): Single;
var
IntCst : Cardinal absolute result;
begin
Result := Value;
IntCst := ($BE6EB50C - IntCst) shr 1;
Result := 0.5 * Result * (3 - Value * Sqr(Result));
end;
{ Misc. }
function MulDiv(Multiplicand, Multiplier, Divisor: Integer): Integer;
{$IFDEF PUREPASCAL}
begin
Result := Int64(Multiplicand) * Int64(Multiplier) div Divisor;
{$ELSE}
asm
{$IFDEF TARGET_x86}
PUSH EBX // Imperative save
PUSH ESI // of EBX and ESI
MOV EBX, EAX // Result will be negative or positive so set rounding direction
XOR EBX, EDX // Negative: substract 1 in case of rounding
XOR EBX, ECX // Positive: add 1
OR EAX, EAX // Make all operands positive, ready for unsigned operations
JNS @m1Ok // minimizing branching
NEG EAX
@m1Ok:
OR EDX, EDX
JNS @m2Ok
NEG EDX
@m2Ok:
OR ECX, ECX
JNS @DivOk
NEG ECX
@DivOK:
MUL EDX // Unsigned multiply (Multiplicand*Multiplier)
MOV ESI, EDX // Check for overflow, by comparing
SHL ESI, 1 // 2 times the high-order 32 bits of the product (EDX)
CMP ESI, ECX // with the Divisor.
JAE @Overfl // If equal or greater than overflow with division anticipated
DIV ECX // Unsigned divide of product by Divisor
SUB ECX, EDX // Check if the result must be adjusted by adding or substracting
CMP ECX, EDX // 1 (*.5 -> nearest integer), by comparing the difference of
JA @NoAdd // Divisor and remainder with the remainder. If it is greater then
INC EAX // no rounding needed; add 1 to result otherwise
@NoAdd:
OR EBX, EDX // From unsigned operations back the to original sign of the result
JNS @Exit // must be positive
NEG EAX // must be negative
JMP @Exit
@Overfl:
OR EAX, -1 // 3 bytes alternative for MOV EAX,-1. Windows.MulDiv "overflow"
// and "zero-divide" return value
@Exit:
POP ESI // Restore
POP EBX // esi and EBX
{$ENDIF}
{$IFDEF TARGET_x64}
MOV EAX, ECX // Result will be negative or positive so set rounding direction
XOR ECX, EDX // Negative: substract 1 in case of rounding
XOR ECX, R8D // Positive: add 1
OR EAX, EAX // Make all operands positive, ready for unsigned operations
JNS @m1Ok // minimizing branching
NEG EAX
@m1Ok:
OR EDX, EDX
JNS @m2Ok
NEG EDX
@m2Ok:
OR R8D, R8D
JNS @DivOk
NEG R8D
@DivOK:
MUL EDX // Unsigned multiply (Multiplicand*Multiplier)
MOV R9D, EDX // Check for overflow, by comparing
SHL R9D, 1 // 2 times the high-order 32 bits of the product (EDX)
CMP R9D, R8D // with the Divisor.
JAE @Overfl // If equal or greater than overflow with division anticipated
DIV R8D // Unsigned divide of product by Divisor
SUB R8D, EDX // Check if the result must be adjusted by adding or substracting
CMP R8D, EDX // 1 (*.5 -> nearest integer), by comparing the difference of
JA @NoAdd // Divisor and remainder with the remainder. If it is greater then
INC EAX // no rounding needed; add 1 to result otherwise
@NoAdd:
OR ECX, EDX // From unsigned operations back the to original sign of the result
JNS @Exit // must be positive
NEG EAX // must be negative
JMP @Exit
@Overfl:
OR EAX, -1 // 3 bytes alternative for MOV EAX,-1. Windows.MulDiv "overflow"
// and "zero-divide" return value
@Exit:
{$ENDIF}
{$ENDIF}
end;
function IsPowerOf2(Value: Integer): Boolean;
//returns true when X = 1,2,4,8,16 etc.
begin
Result := Value and (Value - 1) = 0;
end;
function PrevPowerOf2(Value: Integer): Integer;
//returns X rounded down to the power of two
{$IFDEF PUREPASCAL}
begin
Result := 1;
while Value shr 1 > 0 do
Result := Result shl 1;
{$ELSE}
asm
{$IFDEF TARGET_x86}
BSR ECX, EAX
SHR EAX, CL
SHL EAX, CL
{$ENDIF}
{$IFDEF TARGET_x64}
MOV EAX, Value
BSR ECX, EAX
SHR EAX, CL
SHL EAX, CL
{$ENDIF}
{$ENDIF}
end;
function NextPowerOf2(Value: Integer): Integer;
//returns X rounded up to the power of two, i.e. 5 -> 8, 7 -> 8, 15 -> 16
{$IFDEF PUREPASCAL}
begin
Result := 2;
while Value shr 1 > 0 do
Result := Result shl 1;
{$ELSE}
asm
{$IFDEF TARGET_x86}
DEC EAX
JLE @1
BSR ECX, EAX
MOV EAX, 2
SHL EAX, CL
RET
@1:
MOV EAX, 1
{$ENDIF}
{$IFDEF TARGET_x64}
MOV EAX, Value
DEC EAX
JLE @1
BSR ECX, EAX
MOV EAX, 2
SHL EAX, CL
RET
@1:
MOV EAX, 1
{$ENDIF}
{$ENDIF}
end;
function Average(A, B: Integer): Integer;
//fast average without overflow, useful e.g. for fixed point math
//(A + B)/2 = (A and B) + (A xor B)/2
{$IFDEF PUREPASCAL}
begin
Result := (A and B) + (A xor B) div 2;
{$ELSE}
asm
{$IFDEF TARGET_x86}
MOV ECX, EDX
XOR EDX, EAX
SAR EDX, 1
AND EAX, ECX
ADD EAX, EDX
{$ENDIF}
{$IFDEF TARGET_x64}
MOV EAX, A
MOV ECX, EDX
XOR EDX, EAX
SAR EDX, 1
AND EAX, ECX
ADD EAX, EDX
{$ENDIF}
{$ENDIF}
end;
function Sign(Value: Integer): Integer;
{$IFDEF PUREPASCAL}
begin
//Assumes 32 bit integer
Result := (- Value) shr 31 - (Value shr 31);
{$ELSE}
asm
{$IFDEF TARGET_x64}
MOV EAX, Value
{$ENDIF}
CDQ
NEG EAX
ADC EDX, EDX
MOV EAX, EDX
{$ENDIF}
end;
function FloatMod(x, y: Double): Double;
begin
if (y = 0) then
Result := X
else
Result := x - y * Floor(x / y);
end;
end.