;=======================================================================
; Register
usage
;=======================================================================
.DEF PLO
= r0 ; product low
.DEF PHI
= r1 ; product high
.DEF BITP
= r2 ; bit pattern 0b1111_1000 for MOD_DIV10
.DEF CARRY
= r3 ; carry
(binary), used for MUL, MOD_DIV10
.DEF C205
= r4 ; constant 205 for MOD_DIV10
.DEF C100
= r5 ; constant 100
.DEF ZERO
= r6 ; constant 0
.DEF TRUE
= r7 ; constant 1
; NUM0..NUM3 can be the binary
result of PRODUKT or any other
; 24-bit number to
be multiplied or to be converted to decimal
.DEF NUM0
= r8 ; number LSB
.DEF NUM1
= r9 ; number
.DEF NUM2
= r10 ; number MSB
.DEF LEADZ
= r11 ; mark leading zeros (when printing decimal digits supressing leading zeros)
.DEF DEZ0
= r12 ; decimal digit 0
.DEF DEZ1
= r13 ; decimal digit 1
.DEF DEZ2
= r14 ; decimal digit 2
.DEF DEZ3
= r15 ; decimal digit 3
.DEF AKKU
= r16 ; accumulator
.DEF AKKU2
= r17 ; dito
.DEF AKKU3
= r18 ; dito
.DEF DTMP
= r19 ; delay timer, temporary constant 10 (within factorial)
.DEF STELLENL = r20 ; LSB of number of result digits (stellen)
.DEF STELLENH = r21 ; MSB of number of result digits
.DEF CARRYL
= r22 ; LSB of CARRY
.DEF CARRYH
= r23 ; MSB of CARRY
.DEF CNTL
= r24 ; 16-bit counter L
.DEF CNTH
= r25 ; 16-bit counter H
; registers 26..31 = XL/XH,
YL/YH and ZL/ZH, used as pointers and counters
; XH:XL holds the current FACT
value,
; YH:YL is the FACT loop counter,
; ZH:ZL is the pointer to FArray
;=======================================================================
; Subroutines
;=======================================================================
factorial: ; calculate factorial FACT! in
the huge array FArray (using almost all available
SRAM),
; which includes the result terminated by EOT (0xff)
ldi zh,HIGH(EndOfText) ; EOT location (start of AVR SRAM)
ldi zl,LOW(EndOfText)
ldi AKKU, $ff ; EOT marker
st z+,
AKKU ;
ZH:ZL now pointing to 1st entry of FArray
mov STELLENL,
TRUE ; STELLEN := 1 (even if no entry, important for
;
increasing FACT counter)
clr STELLENH
ldi DTMP, 10 ; temporary constant 10 (within function factorial)
ldi xl, 1 ; start calculation with FACT := 1
clr xh
st z, TRUE
ldi yh,HIGH(FACT-1)
; decrementing loop counter for
factorial calculation
ldi yl,LOW(FACT-1)
; now calculate factorial 1 * 2
* ... FACT
; for x := 2 to FACT do ; this version calculates
factorial incrementally
;
(which is faster than decrementing)
adiw XH:XL,1 ; FACT := FACT + 1 (which is 2)
REPEAT
; the 16-bit FOR loop is handled by
REPEAT-UNTIL using
; decrementing YH:YL, just FACT = XH:XL is incrementing
ldi zh,HIGH(FArray) ; Z register is an increasing pointer to FArray
ldi zl,LOW(FArray)
mov CNTL,STELLENL ; decrementing loop counter for 2nd FOR loop (STELLEN)
mov CNTH,STELLENH
clr CARRYL ; the 16-bit factorial CARRYH:CARRYL belongs to
;
NUM2:NUM1:NUM0 (is different from the 8-bit MOD_DIV10 CARRY)
clr CARRYH
; Pascal code:
; carry := 0;
; for j := 1 to stellen do // Index starting from 1
; begin
; produkt := FakArray[j] * i + carry; // i = current FACT value
; FakArray[j] := produkt
mod 10;
; carry := produkt div 10;
; end;
REPEAT ; multiply decimal digit from FArray[ZH:ZL] by XH:XL
;
(current 16 bit FACT value), result in NUM2:NUM1:NUM0
clr NUM2
ld AKKU,z ; take the next SINGLE decimal digit from FArray[ZH:ZL]
mul xl,AKKU ; multiply by FACTL
movw NUM1:NUM0,
PHI:PLO
mul xh,AKKU ; AKKU still is the same decimal digit,
; now
multiplied by FACTH
add NUM1, PLO
adc NUM2, PHI ; binary result is not more than 20 bits
add NUM0, CARRYL
adc NUM1, CARRYH
adc NUM2, ZERO
rcall Bin16Dez ; instead of DIV10/MOD10, convert 16 bit binary NUM1..NUM0
; to 4
decimal digits DEZ3..DEZ0, no AKKUs are saved
st z+,
DEZ0 ; store DEZ0 (= PRODUKT MOD 10) in FArray
and increment
;
ZH:ZL for next entry, new binary CARRYH:CARRYL
;
will become 100*DEZ3 + 10*DEZ2 + DEZ1
mul C100,
DEZ3 ; 100 * DEZ3
movw
CARRYH:CARRYL,PHI:PLO
mul DTMP,
DEZ2 ; 10 * DEZ2 (DTMP is temporary = 10)
add CARRYL, PLO
adc CARRYH, PHI
add CARRYL, DEZ1
adc CARRYH,
ZERO ; now CARRYH:CARRYL is binary value of
;
PRODUKT DIV 10 = DEZ3:DEZ2:DEZ1
sbiw CNTH:CNTL,
1 ; next FOR (STELLEN)
UNTIL
Z
; handle the decimal carry and update STELLEN
...
; Pascal code:
; while carry
<> 0 do
; begin
; stellen := stellen
+ 1; // erst
+1, dann Übertrag speichern
; FakArray[stellen] := carry mod 10;
// Übertrag in die nächste
Stelle
; carry := carry div 10;
; end;
; now store decimal digits
DEZ3:DEZ2:DEZ1 (equivalent to CARRYH:CARRY) as additional
; entries in FArray[ZH:ZL] and
update STELLEN ... (this method is much faster than
; any
binary MOD_DIV10 due to availability of decimal digits)
; for
factorials above 960! register DEZ4 is required (or use register CARRY instead)
; ZH:ZL points to next free
entry of FArray
IF
DEZ3 <> #0 ; check decimal digits top down ...
std z+2, DEZ3
std z+1, DEZ2
st z, DEZ1
subi STELLENL,
-3 ; STELLEN := STELLEN + 3
sbci STELLENH,
high(-3) ; to do a 16 bit addition
ELSEIF
DEZ2 <> #0
std z+1, DEZ2
st z, DEZ1
subi STELLENL,
-2 ; STELLEN := STELLEN + 2
sbci STELLENH,
high(-2) ; to do a 16 bit addition
ELSEIF
DEZ1 <> #0
st z, DEZ1
add STELLENL, true ;
STELLEN := STELLEN + 1
adc STELLENH,
ZERO
ELSE
; anything forgotten?
ENDI
adiw XH:XL,1 ; FACT := FACT+1
sbiw YH:YL,1 ; decrementing REPEAT loop counter (originally FOR)
UNTIL Z
ret
;=======================================================================
Bin16Dez:
; Convert 16 bit binary NUM1..NUM0
to 4 decimal digits DEZ3..DEZ0 and store DEZx in FArray,
; CARRY is
equivalent to DEZ4 (if required), also update STELLENH:STELLENL
and CARRYH:CARRYL
; algorithm according to
http://homepage.divms.uiowa.edu/~jones/bcd/decimal.html:
;
n
; |_ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _|
; |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|
; | n
| n | n |
n |
; 3 2
1 0
; The value of the number can be expressed in terms of
these 4 fields as follows:
;
; n = 4096 * n3 + 256 * n2 + 16 * n1 + 1 * n0
;
; In the same way, if the value of n, expressed in
decimal, is d4_d3_d2_d1_d0,
; where each of d4, d3, d2, d1
and d0 are decimal digits, the value of n can be expressed as:
;
; n = 10000 * d4 + 1000 * d3 + 100 * d2 + 10 * d1 + 1 *
d0
; Our problem then, is to compute the di from the ni
without dealing directly with the
; larger number n.
; To do this, we first note that the factors used in combining
the values of ni can
themselves
; be
expressed as sums of multiples of powers of 10.
; Therefore, we can rewrite the original expression as:
; n =
; n3 * (4*1000 +
0*100 + 9*10 + 6*1) +
; n2 * (2*100 +
5*10 + 6*1) +
; n1 * (1*10 +
6*1) +
; n0 * (1*1)
;
; If distribute the ni over the expressions for each factor, then factor
out the multiples
; of 100, we get the following:
; n =
; 1000 * (4*n3)
+
; 100 * (0*n3 +
2*n2) +
; 10 * (9*n3 +
5*n2 + 1*n1) +
; 1 * (6*n3 +
6*n2 + 6*n1 + 1*n0)
; We can use this to arrive at first approximations ai for d3 through d0:
; a3 = 4 * n3
; a2 = 0 * n3 + 2 * n2
; a1 = 9 * n3 + 5 * n2 + 1 * n1
; a0 = 6 * n3 + 6 * n2 + 6 * n1 + 1 * n0
; The values of ai are not
proper decimal digits because they are not properly bounded
; in the range 0 ≤ ai ≤
9.
; Instead, given that each of ni is bounded in the range 0 ≤ ni ≤ 15, the ai are bounded
; as follows:
; 0 <= a3 <= 60
; 0 <= a2 <= 30
; 0 <= a1 <= 225
; 0 <= a0 <= 285
Overflow possible!
; C code:
;
void putdec( uint16_t n ) {
; uint8_t d4,
d3, d2, d1, d0, q;
; d0 = n & 0xF;
; d1 =
(n>>4) &
0xF;
; d2 =
(n>>8) &
0xF;
; d3 =
(n>>12) & 0xF;
; if ( (6*(d3 + d2 + d1) + d0) > 255 ) { // overflow, only
if n >= 49146 (own tests)
; d0 =
(6*(d3 + d2 + d1) + d0) & 0xFF;
; q =
d0/10 + 25;
; d0 = d0
% 10 + 6;
; if (d0 >= 10) {
; d0 =
d0 - 10;
; q = q + 1;
; }
; } else
{ // no
overflow
; d0 =
6*(d3 + d2 + d1) + d0;
; q = d0 /
10;
; d0 = d0
% 10;
; }
; prepare DEZ0 (d0) ... DEZ3 (d3), q is the carry from
one digit to the next
ldi AKKU, 0xf
mov DEZ0, NUM0
and DEZ0, AKKU
mov DEZ1, NUM0
swap DEZ1 ; n >> 4
and DEZ1, AKKU
mov DEZ2, NUM1 ;
n >> 8
and DEZ2, AKKU
mov DEZ3, NUM1
swap DEZ3 ; n >> 12
and DEZ3, AKKU
/*
d0 = 6*(d3
+ d2 + d1) + d0;
q = d0 /
10;
d0 = d0 %
10;
*/
; if ( (6*(d3 + d2 + d1) + d0) > 255 ) { // overflow, only
if n >= 49146
; d0 =
(6*(d3 + d2 + d1) + d0) & 0xFF;
; q =
d0/10 + 25;
; d0 = d0
% 10 + 6;
; if (d0 >= 10) {
; d0 =
d0 - 10;
; q = q + 1;
; }
; } else
{ // no
overflow
; d0 =
6*(d3 + d2 + d1) + d0;
; q = d0 /
10;
; d0 = d0
% 10;
; }
; calculate ((6*(d2 + d1) + d0) + 6*d3) to make sure
that the carry (if any)
; happens
during the last add instruction:
mov AKKU2, DEZ1
add AKKU2, DEZ2 ;
d1 + d2
ldi AKKU3, 6 ; constant 6, until used by CARRY
mul AKKU3,
AKKU2 ; *6 (d1+d2), LSB of result is <= 180
mov AKKU, PLO
add AKKU, DEZ0 ;
now we got AKKU = 6*(d2 + d1) + d0) <= 195
mul AKKU3,
DEZ3 ; 6* d3, result is <= 90
add AKKU, PLO ; can overflow, but only if n >= 49146 (own tests)
IF C
MOD_DIV10 ; input: number in AKKU, output: number MOD 10 in AKKU,
; number DIV 10 in CARRY
ldi AKKU2, 25
add CARRY, AKKU2 ;
CARRY := CARRY + 25
add AKKU, AKKU3 ;
DEZ0 := DEZ0 + 6
IF
AKKU >= #10
subi AKKU, 10 ;
DEZ0 := DEZ0 - 10 (also: sub AKKU, DTMP)
inc CARRY ; CARRY := CARRY + 1
ENDI
ELSE
MOD_DIV10 ; input: number in AKKU, output: number MOD 10 in AKKU,
; number DIV 10 in CARRY
ENDI
mov DEZ0, AKKU
mov AKKU3, CARRY ;
AKKU3 is no longer constant 6
/*
d1 = q +
9*d3 + 5*d2 + d1;
q = d1 /
10;
d1 = d1 %
10;
*/
ldi AKKU, 9
mul AKKU,
DEZ3 ; result <= 135
mov AKKU,
PLO ; 8 bit are sufficient
ldi AKKU2, 5
mul AKKU2, DEZ2
; result <= 75
add AKKU, PLO ; + 5* DEZ2, result <= 210
add AKKU, DEZ1 ;
<= 225
add AKKU, AKKU3 ;
previous carry
MOD_DIV10 ; input: number in AKKU, output: number MOD 10 in AKKU,
; number DIV 10 in CARRY
mov AKKU3, CARRY
mov DEZ1, AKKU
/*
d2 = q +
2*d2;
q = d2 /
10;
d2 = d2 %
10;
*/
mov AKKU, DEZ2
lsl AKKU ; *2
add AKKU, AKKU3 ;
previous carry
MOD_DIV10 ; input: number in AKKU, output: number MOD 10 in AKKU,
; number DIV 10 in CARRY
mov AKKU3, CARRY
mov DEZ2, AKKU
/*
d3 = q +
4*d3;
q = d3 /
10;
d3 = d3 %
10;
*/
mov AKKU, DEZ3
lsl AKKU
lsl AKKU ; *4
add AKKU, AKKU3 ;
previous carry
MOD_DIV10 ; input: number in AKKU, output: number MOD 10 in AKKU,
; number DIV 10 in CARRY
mov DEZ3, AKKU
;mov DEZ4,
CARRY ; if n > 9999 (DEZ4 required
from factorial 960 or use CARRY instead)
ret