clr r26 ;
clr r27 ;
loop67: ; These are for test puposes only
movw r16,r26 ; ; Place for Breakpoint START
;============= The Algorithm Starts Here ==========================
; INPUT: two bytes R17:R16 (0-65535)
; General Ideea: divide R17:R16 by 1000 (rest 999)
; I. Divide by 4
lsr r17
ror r16
ror r3 ; was r3
lsr r17
ror r16
ror r3
; div. by 4 (remainder shifteded to r3) ; 6clk
; II. Divide by 250 (max. $1fff)
; Below, We are making a transformation from base 256
; to binary coded base 250
; R17 is Quotient and R16 is Remainder
; - initially for the 256 division (already in place)
; - and finally for the 250 division (it is div by 1000, at end)
; How many times we had to ADD SIX,
; looking on the /256 Quotient&Remainder?
; Q*256+R*256=Q*(250+6)+R*(250+6)=Q*250+R*250 + 6*Q+R base 256 expressed in 10
; Q250+R250 base 250 expressed in 10
; The two above should be equal.
; And we have to redistribute 6*Q+R from B256 to B250
.DEF SIX=R18
ldi SIX,6 ; Quotient is R17 and Remainder is R16 for B256
mul r17,SIX ; that is 6 times QB256 + possible RB250 in RB256
add R17,r1 ; No Carry
sbrc r1,0 ; if r1 is 1 then there is No Carry Here
add r0,SIX ; add SIX; because we INC Quotient ; Here 12 clk
;------
add r16,r0 ; we add Remainder on the old one
brcc PC+4 ; but if we have CARRY?
inc r17 ; increment Quotient
add r16,SIX ; add SIX; because we INC Quotient
brcs shortc ; Shortcut: Non-structured programming Here!
cpi r16,250 ; Remainder on B250 should be lower than 250
brlo PC+3 ; ( possible 1*Q RB250 in R16)
shortc: add r16,SIX
inc r17
;------ min 6 and max 9
; depending on INPUT. And 18 to 21 clk from start.
;.UNDEF SIX ; no need for SIX, in this routine anylonger
; We are searching first only the hundreds, 'cause RB250 is 10 bits.
; The least significant two bits are shifted to R3,
; but do not influence the hundred digit
ldi r20,164 ; found "164" with Excel, as suggested by:
mul r20,r16 ; https://www.avrfreaks.net/users/sparrow2-0
mov r21,r1 ; hundreds digit in R21 & reversed position
andi r21,$F0 ; because only high 4 bits are good; rest is !=0
swap r21 ; now hundreds in place R21 HUNDREDS
ldi r19,25 ; I have to SUB 25 times hundreds digit,
mul r19,r21 ; 'cause of initial division by 4
sub r16,r0 ; 28 to 31 clk from start
lsl r3 ; and "POP" the two bits in R3 at /4
rol r16 ; needed for tens and units; on Remainder
lsl r3
rol r16 ; 32 to 35 clk from start
; max. 99 byte conversion to BCD
; as sugested by https://www.avrfreaks.net/users/sparrow2-0
ldi r20,26 ; load value for mul with 1/10 (26/256)
mov r2,r16 ; make a copy to the remainder place
sbrc r16,6 ; if more than 64 (65, verified by me)
dec r2 ; use one less
mul r2,r20 ; do the 1/10 mul
mov r3,r1 ; high result in high byte of mul R3 TENS
ldi r19,10 ; load value for mul with 10
mul r1,r19
sub r16,r0 ; 43 to 46 clk from start R16 UNITS
; QB250 max. 65 'cause ffff/1000=65
; mov r18,r20 ;make a copy to the remainder place
; sbrc r20,6 ;if more than 64! (it works for 65, too)
; dec r16 ; use one less - no need, also
mul r17,r20 ; do the 1/10 mul; QB250 still in R17
mov r18,r1 ; high result in high byte of mul
; ldi r19,10 ; load value for mul with 10; already there
mul r1,r19 ; already loaded 10
sub r17,r0 ; low digit is the remainder ; 49 to 52
;============= The Algorithm Ends Here ============================
; 3660807 Counter with program
; 393215 Counter run empty
; 3267592 Diffrence
; 49.86 Diffrence/65536 My first Under 50 Cyles Average!
; uses 7 MUL's so it should be 45 Words length Min/Max 49/52
; exit is in stupid order R18:R17:R21:R3:R24 (I'll correct This!)
; Tested Good on all 0 - 65535
adiw R26,1 ;
brcs outofloop ; These are for test puposes only
rjmp loop67 ;
outofloop: ;
nop ; Place for Breakpoint STOP
rjmp PC-1 ;