factorial:

;=======================================================================

; Register usage

;=======================================================================

.DEF PLO = r0 ; product low

.DEF PHI = r1 ; product high

.DEF BITP = r2 ; bit pattern 0b1111_1000 for MOD_DIV10

.DEF CARRY = r3 ; carry (binary), used for MUL, MOD_DIV10

.DEF C205 = r4 ; constant 205 for MOD_DIV10

.DEF C100 = r5 ; constant 100

.DEF ZERO = r6 ; constant 0

.DEF TRUE = r7 ; constant 1

; NUM0..NUM3 can be the binary result of PRODUKT or any other

; 24-bit number to be multiplied or to be converted to decimal

.DEF NUM0 = r8 ; number LSB

.DEF NUM1 = r9 ; number

.DEF NUM2 = r10 ; number MSB

.DEF LEADZ = r11 ; mark leading zeros (when printing decimal digits supressing leading zeros)

.DEF DEZ0 = r12 ; decimal digit 0

.DEF DEZ1 = r13 ; decimal digit 1

.DEF DEZ2 = r14 ; decimal digit 2

.DEF DEZ3 = r15 ; decimal digit 3

.DEF AKKU = r16 ; accumulator

.DEF AKKU2 = r17 ; dito

.DEF AKKU3 = r18 ; dito

.DEF DTMP = r19 ; delay timer, temporary constant 10 (within factorial)

.DEF STELLENL = r20 ; LSB of number of result digits (stellen)

.DEF STELLENH = r21 ; MSB of number of result digits

.DEF CARRYL = r22 ; LSB of CARRY

.DEF CARRYH = r23 ; MSB of CARRY

.DEF CNTL = r24 ; 16-bit counter L

.DEF CNTH = r25 ; 16-bit counter H

; registers 26..31 = XL/XH, YL/YH and ZL/ZH, used as pointers and counters

; XH:XL holds the current FACT value,

; YH:YL is the FACT loop counter,

; ZH:ZL is the pointer to FArray

;=======================================================================

; Subroutines

;=======================================================================

factorial: ; calculate factorial FACT! in the huge array FArray (using almost all available SRAM),

; which includes the result terminated by EOT (0xff)

ldi zh,HIGH(EndOfText) ; EOT location (start of AVR SRAM)

ldi zl,LOW(EndOfText)

ldi AKKU, $ff ; EOT marker

st z+, AKKU ; ZH:ZL now pointing to 1st entry of FArray

mov STELLENL, TRUE ; STELLEN := 1 (even if no entry, important for

; increasing FACT counter)

clr STELLENH

ldi DTMP, 10 ; temporary constant 10 (within function factorial)

ldi xl, 1 ; start calculation with FACT := 1

clr xh

st z, TRUE

ldi yh,HIGH(FACT-1) ; decrementing loop counter for factorial calculation

ldi yl,LOW(FACT-1)

; now calculate factorial 1 * 2 * ... FACT

; for x := 2 to FACT do ; this version calculates factorial incrementally

; (which is faster than decrementing)

adiw XH:XL,1 ; FACT := FACT + 1 (which is 2)

REPEAT ; the 16-bit FOR loop is handled by REPEAT-UNTIL using

; decrementing YH:YL, just FACT = XH:XL is incrementing

ldi zh,HIGH(FArray) ; Z register is an increasing pointer to FArray

ldi zl,LOW(FArray)

mov CNTL,STELLENL ; decrementing loop counter for 2nd FOR loop (STELLEN)

mov CNTH,STELLENH

clr CARRYL ; the 16-bit factorial CARRYH:CARRYL belongs to

; NUM2:NUM1:NUM0 (is different from the 8-bit MOD_DIV10 CARRY)

clr CARRYH

; Pascal code:

; carry := 0;

; for j := 1 to stellen do // Index starting from 1

; begin

; produkt := FakArray[j] * i + carry; // i = current FACT value

; FakArray[j] := produkt mod 10;

; carry := produkt div 10;

; end;

REPEAT ; multiply decimal digit from FArray[ZH:ZL] by XH:XL

; (current 16 bit FACT value), result in NUM2:NUM1:NUM0

clr NUM2

ld AKKU,z ; take the next SINGLE decimal digit from FArray[ZH:ZL]

mul xl,AKKU ; multiply by FACTL

movw NUM1:NUM0, PHI:PLO

mul xh,AKKU ; AKKU still is the same decimal digit,

; now multiplied by FACTH

add NUM1, PLO

adc NUM2, PHI ; binary result is not more than 20 bits

add NUM0, CARRYL

adc NUM1, CARRYH

adc NUM2, ZERO

rcall Bin16Dez ; instead of DIV10/MOD10, convert 16 bit binary NUM1..NUM0

; to 4 decimal digits DEZ3..DEZ0, no AKKUs are saved

st z+, DEZ0 ; store DEZ0 (= PRODUKT MOD 10) in FArray and increment

; ZH:ZL for next entry, new binary CARRYH:CARRYL

; will become 100*DEZ3 + 10*DEZ2 + DEZ1

mul C100, DEZ3 ; 100 * DEZ3

movw CARRYH:CARRYL,PHI:PLO

mul DTMP, DEZ2 ; 10 * DEZ2 (DTMP is temporary = 10)

add CARRYL, PLO

adc CARRYH, PHI

add CARRYL, DEZ1

adc CARRYH, ZERO ; now CARRYH:CARRYL is binary value of

; PRODUKT DIV 10 = DEZ3:DEZ2:DEZ1

sbiw CNTH:CNTL, 1 ; next FOR (STELLEN)

UNTIL Z

; handle the decimal carry and update STELLEN ...

; Pascal code:

; while carry <> 0 do

; begin

; stellen := stellen + 1; // erst +1, dann Übertrag speichern

; FakArray[stellen] := carry mod 10; // Übertrag in die nächste Stelle

; carry := carry div 10;

; end;

; now store decimal digits DEZ3:DEZ2:DEZ1 (equivalent to CARRYH:CARRY) as additional

; entries in FArray[ZH:ZL] and update STELLEN ... (this method is much faster than

; any binary MOD_DIV10 due to availability of decimal digits)

; for factorials above 960! register DEZ4 is required (or use register CARRY instead)

; ZH:ZL points to next free entry of FArray

IF DEZ3 <> #0 ; check decimal digits top down ...

std z+2, DEZ3

std z+1, DEZ2

st z, DEZ1

subi STELLENL, -3 ; STELLEN := STELLEN + 3

sbci STELLENH, high(-3) ; to do a 16 bit addition

ELSEIF DEZ2 <> #0

std z+1, DEZ2

st z, DEZ1

subi STELLENL, -2 ; STELLEN := STELLEN + 2

sbci STELLENH, high(-2) ; to do a 16 bit addition

ELSEIF DEZ1 <> #0

st z, DEZ1

add STELLENL, true ; STELLEN := STELLEN + 1

adc STELLENH, ZERO

ELSE

; anything forgotten?

ENDI

adiw XH:XL,1 ; FACT := FACT+1

sbiw YH:YL,1 ; decrementing REPEAT loop counter (originally FOR)

UNTIL Z

ret

;=======================================================================

Bin16Dez:

; Convert 16 bit binary NUM1..NUM0 to 4 decimal digits DEZ3..DEZ0 and store DEZx in FArray,

; CARRY is equivalent to DEZ4 (if required), also update STELLENH:STELLENL and CARRYH:CARRYL

; algorithm according to http://homepage.divms.uiowa.edu/~jones/bcd/decimal.html:

; n

; |_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|

; |_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|

; | n | n | n | n |

; 3 2 1 0

; The value of the number can be expressed in terms of these 4 fields as follows:

;

; n = 4096 * n3 + 256 * n2 + 16 * n1 + 1 * n0

;

; In the same way, if the value of n, expressed in decimal, is d4_d3_d2_d1_d0,

; where each of d4, d3, d2, d1 and d0 are decimal digits, the value of n can be expressed as:

;

; n = 10000 * d4 + 1000 * d3 + 100 * d2 + 10 * d1 + 1 * d0

; Our problem then, is to compute the di from the ni without dealing directly with the

; larger number n.

; To do this, we first note that the factors used in combining the values of ni can themselves

; be expressed as sums of multiples of powers of 10.

; Therefore, we can rewrite the original expression as:

; n =

; n3 * (4*1000 + 0*100 + 9*10 + 6*1) +

; n2 * (2*100 + 5*10 + 6*1) +

; n1 * (1*10 + 6*1) +

; n0 * (1*1)

;

; If distribute the ni over the expressions for each factor, then factor out the multiples

; of 100, we get the following:

; n =

; 1000 * (4*n3) +

; 100 * (0*n3 + 2*n2) +

; 10 * (9*n3 + 5*n2 + 1*n1) +

; 1 * (6*n3 + 6*n2 + 6*n1 + 1*n0)

; We can use this to arrive at first approximations ai for d3 through d0:

; a3 = 4 * n3

; a2 = 0 * n3 + 2 * n2

; a1 = 9 * n3 + 5 * n2 + 1 * n1

; a0 = 6 * n3 + 6 * n2 + 6 * n1 + 1 * n0

; The values of ai are not proper decimal digits because they are not properly bounded

; in the range 0 ≤ ai ≤ 9.

; Instead, given that each of ni is bounded in the range 0 ≤ ni ≤ 15, the ai are bounded

; as follows:

; 0 <= a3 <= 60

; 0 <= a2 <= 30

; 0 <= a1 <= 225

; 0 <= a0 <= 285 Overflow possible!

; C code:

; void putdec( uint16_t n ) {

; uint8_t d4, d3, d2, d1, d0, q;

; d0 = n & 0xF;

; d1 = (n>>4) & 0xF;

; d2 = (n>>8) & 0xF;

; d3 = (n>>12) & 0xF;

; if ( (6*(d3 + d2 + d1) + d0) > 255 ) { // overflow, only if n >= 49146 (own tests)

; d0 = (6*(d3 + d2 + d1) + d0) & 0xFF;

; q = d0/10 + 25;

; d0 = d0 % 10 + 6;

; if (d0 >= 10) {

; d0 = d0 - 10;

; q = q + 1;

; }

; } else { // no overflow

; d0 = 6*(d3 + d2 + d1) + d0;

; q = d0 / 10;

; d0 = d0 % 10;

; }

; prepare DEZ0 (d0) ... DEZ3 (d3), q is the carry from one digit to the next

ldi AKKU, 0xf

mov DEZ0, NUM0

and DEZ0, AKKU

mov DEZ1, NUM0

swap DEZ1 ; n >> 4

and DEZ1, AKKU

mov DEZ2, NUM1 ; n >> 8

and DEZ2, AKKU

mov DEZ3, NUM1

swap DEZ3 ; n >> 12

and DEZ3, AKKU

d0 = 6*(d3 + d2 + d1) + d0;

q = d0 / 10;

d0 = d0 % 10;

; if ( (6*(d3 + d2 + d1) + d0) > 255 ) { // overflow, only if n >= 49146

; d0 = (6*(d3 + d2 + d1) + d0) & 0xFF;

; q = d0/10 + 25;

; d0 = d0 % 10 + 6;

; if (d0 >= 10) {

; d0 = d0 - 10;

; q = q + 1;

; }

; } else { // no overflow

; d0 = 6*(d3 + d2 + d1) + d0;

; q = d0 / 10;

; d0 = d0 % 10;

; }

; calculate ((6*(d2 + d1) + d0) + 6*d3) to make sure that the carry (if any)

; happens during the last add instruction:

mov AKKU2, DEZ1

add AKKU2, DEZ2 ; d1 + d2

ldi AKKU3, 6 ; constant 6, until used by CARRY

mul AKKU3, AKKU2 ; *6 (d1+d2), LSB of result is <= 180

mov AKKU, PLO

add AKKU, DEZ0 ; now we got AKKU = 6*(d2 + d1) + d0) <= 195

mul AKKU3, DEZ3 ; 6* d3, result is <= 90

add AKKU, PLO ; can overflow, but only if n >= 49146 (own tests)

IF C

MOD_DIV10 ; input: number in AKKU, output: number MOD 10 in AKKU,

; number DIV 10 in CARRY

ldi AKKU2, 25

add CARRY, AKKU2 ; CARRY := CARRY + 25

add AKKU, AKKU3 ; DEZ0 := DEZ0 + 6

IF AKKU >= #10

subi AKKU, 10 ; DEZ0 := DEZ0 - 10 (also: sub AKKU, DTMP)

inc CARRY ; CARRY := CARRY + 1

ENDI

ELSE

MOD_DIV10 ; input: number in AKKU, output: number MOD 10 in AKKU,

; number DIV 10 in CARRY

ENDI

mov DEZ0, AKKU

mov AKKU3, CARRY ; AKKU3 is no longer constant 6

d1 = q + 9*d3 + 5*d2 + d1;

q = d1 / 10;

d1 = d1 % 10;

ldi AKKU, 9

mul AKKU, DEZ3 ; result <= 135

mov AKKU, PLO ; 8 bit are sufficient

ldi AKKU2, 5

mul AKKU2, DEZ2 ; result <= 75

add AKKU, PLO ; + 5* DEZ2, result <= 210

add AKKU, DEZ1 ; <= 225

add AKKU, AKKU3 ; previous carry

MOD_DIV10 ; input: number in AKKU, output: number MOD 10 in AKKU,

; number DIV 10 in CARRY

mov AKKU3, CARRY

mov DEZ1, AKKU

d2 = q + 2*d2;

q = d2 / 10;

d2 = d2 % 10;

mov AKKU, DEZ2

lsl AKKU ; *2

add AKKU, AKKU3 ; previous carry

MOD_DIV10 ; input: number in AKKU, output: number MOD 10 in AKKU,

; number DIV 10 in CARRY

mov AKKU3, CARRY

mov DEZ2, AKKU

d3 = q + 4*d3;

q = d3 / 10;

d3 = d3 % 10;

mov AKKU, DEZ3

lsl AKKU

lsl AKKU ; *4

add AKKU, AKKU3 ; previous carry

MOD_DIV10 ; input: number in AKKU, output: number MOD 10 in AKKU,

; number DIV 10 in CARRY

mov DEZ3, AKKU

;mov DEZ4, CARRY ; if n > 9999 (DEZ4 required from factorial 960 or use CARRY instead)

ret