I tried to implement unsigned 32 bit multiplication to be used on CH32V003 chip which doesn't have a hardware multiplier. I translated a code used for AVR to RISCV assembly (shift and add method) which outputs a 64bit answer. Most of the numbers i tried were multiplied correctly including max 0xffffffff times 0xffffffff. When i try 0xffffffff times 0x19 i get 0x12ffffffe7 but the correct answer should be 0x18ffffffe7. I tried multiple codes of different permutations and combinations provided by CHATGPT,GEMINI,DEEPSEEK etc but the faulty answer persists. All the AI chatbots gave up. I am a hobbyist and my knowledge in this subject is limited. I am simulating the code in RARS. The only code I was able to get correct answer was repeated addition of the multiplicand multiplier times with carry/overflow propagation. I doubt the shift and add method fails because of no proper way to detect overflow like carry bit. If any members can find a solution i'd be grateful.
my code translated from AVR`
.data
result_lo: .word 0
result_hi: .word 0
modulo: .word 0
.text
li a1,0xffffffff # multiplicant
li a2,0x19 # multiplier
li a3,0x00000000 # result_lo
li a4,0x00000000 # result_hi
li a5,0 # working register
start:
call ROR # rotate right multiplier to test lsb is 0 or 1
bnez x3,multiply # if lsb =1 branch to repeated adding of multiplicant to result register
finishmul:
call RLL2 # shift multiplicand left or multiply by 2
beqz a2,exit_proc
J start # repeat loop
exit_proc:
j exit_proc
#ret
multiply:
add a5,a3,a1 # add multiplicant to low result register and store final result in a5 for processing
sltu a0,a5,a3 # set a0 to 1 if result of addition a3:a1 i a5 is greater than a3
sltu x3,a5,a1 # set x3 to 1 if result of addition a3:a1 in a5 is greater than a1
or a0,a0,x3 # or a0 and x3 , if 1 carry if a0 = 0 no carry
bnez a0,carryset # if a0 = 1 carry set, branch to label carry set
mv a3,a5 # result in working register copied to a3 low result register
J finishmul # jump to label finishmul
carryset: # reach here only if carryset
mv a3,a5 # copy a5 to low result a3
addi a4,a4,1 # add carry to a4 high register result
J finishmul # jump to label finishmul
ROR:
li x3,0 # clear carry
mv t0,a2 # copy number in a2 to t0
andi t0,t0,1 # extract lsb is 0 or 1
beqz t0,zzz # if lab is 0 branch to zzz
li x3,1 # if lsb is 1 carry occured , load 1 in carry register x3
srli a2,a2,1 # shift right a2 by 1 postion
ret # return to caller
zzz: # reach here if lsb =0
li x3,0 # load x3 0 indicating carry bit is 0
srli a2,a2,1 # right shift multiplier once. divide multiplier by 2
ret # return to caller
ROL:
li x3,0 #
mv t0,a2
li x3,0x80000000
and t0,t0,x3
beqz t0,zzz1
li x3,1 # carry
slli a2,a2,1
ret
zzz1:
li x3,0
slli a2,a2,1
ret
RLL2: # rotate left 2 registers a3:a5
mv a5,a4 # copy contents of a4 to a5
li x3,0 # clear x3
mv t0,a1 # copy multiplicant to t0
li x3 ,0x80000000 # load x3 MSB bitmask
and t0,t0,x3 # and with 0x800000000 to extract the MSB
bnez t0,OR1 # if MSB = 1 branch to OR1 label
slli a1,a1,1 # shift left 1 position a1 register ( multiplicant)
slli a5,a5,1 # shift left 1 position working register with value of a4 register ( multiplicant)
beqz a2,exit # if multiplier register is 0 exit
mv a4,a5 # copy back the shifter multiplicant to a4
ret
OR1:
mv a5,a4
slli a1,a1,1
slli a5,a5,1
li x3,1
or a5,a5,x3
beqz a2,exit
mv a4,a5
ret
exit:
ret
i tried above code and all the answer for 0xffffffff multiplied by 0x19 is 0x12ffffffe7 in RARS. It should be 0x18ffffffe7 as per calculator. If i do repeated addition of 0xffffffff 0x19 times i get the correct answer 0x18ffffffe7.
I tried to implement unsigned 32 bit multiplication to be used on CH32V003 chip which doesn't have a hardware multiplier. I translated a code used for AVR to RISCV assembly (shift and add method) which outputs a 64bit answer. Most of the numbers i tried were multiplied correctly including max 0xffffffff times 0xffffffff. When i try 0xffffffff times 0x19 i get 0x12ffffffe7 but the correct answer should be 0x18ffffffe7. I tried multiple codes of different permutations and combinations provided by CHATGPT,GEMINI,DEEPSEEK etc but the faulty answer persists. All the AI chatbots gave up. I am a hobbyist and my knowledge in this subject is limited. I am simulating the code in RARS. The only code I was able to get correct answer was repeated addition of the multiplicand multiplier times with carry/overflow propagation. I doubt the shift and add method fails because of no proper way to detect overflow like carry bit. If any members can find a solution i'd be grateful.
my code translated from AVR`
.data
result_lo: .word 0
result_hi: .word 0
modulo: .word 0
.text
li a1,0xffffffff # multiplicant
li a2,0x19 # multiplier
li a3,0x00000000 # result_lo
li a4,0x00000000 # result_hi
li a5,0 # working register
start:
call ROR # rotate right multiplier to test lsb is 0 or 1
bnez x3,multiply # if lsb =1 branch to repeated adding of multiplicant to result register
finishmul:
call RLL2 # shift multiplicand left or multiply by 2
beqz a2,exit_proc
J start # repeat loop
exit_proc:
j exit_proc
#ret
multiply:
add a5,a3,a1 # add multiplicant to low result register and store final result in a5 for processing
sltu a0,a5,a3 # set a0 to 1 if result of addition a3:a1 i a5 is greater than a3
sltu x3,a5,a1 # set x3 to 1 if result of addition a3:a1 in a5 is greater than a1
or a0,a0,x3 # or a0 and x3 , if 1 carry if a0 = 0 no carry
bnez a0,carryset # if a0 = 1 carry set, branch to label carry set
mv a3,a5 # result in working register copied to a3 low result register
J finishmul # jump to label finishmul
carryset: # reach here only if carryset
mv a3,a5 # copy a5 to low result a3
addi a4,a4,1 # add carry to a4 high register result
J finishmul # jump to label finishmul
ROR:
li x3,0 # clear carry
mv t0,a2 # copy number in a2 to t0
andi t0,t0,1 # extract lsb is 0 or 1
beqz t0,zzz # if lab is 0 branch to zzz
li x3,1 # if lsb is 1 carry occured , load 1 in carry register x3
srli a2,a2,1 # shift right a2 by 1 postion
ret # return to caller
zzz: # reach here if lsb =0
li x3,0 # load x3 0 indicating carry bit is 0
srli a2,a2,1 # right shift multiplier once. divide multiplier by 2
ret # return to caller
ROL:
li x3,0 #
mv t0,a2
li x3,0x80000000
and t0,t0,x3
beqz t0,zzz1
li x3,1 # carry
slli a2,a2,1
ret
zzz1:
li x3,0
slli a2,a2,1
ret
RLL2: # rotate left 2 registers a3:a5
mv a5,a4 # copy contents of a4 to a5
li x3,0 # clear x3
mv t0,a1 # copy multiplicant to t0
li x3 ,0x80000000 # load x3 MSB bitmask
and t0,t0,x3 # and with 0x800000000 to extract the MSB
bnez t0,OR1 # if MSB = 1 branch to OR1 label
slli a1,a1,1 # shift left 1 position a1 register ( multiplicant)
slli a5,a5,1 # shift left 1 position working register with value of a4 register ( multiplicant)
beqz a2,exit # if multiplier register is 0 exit
mv a4,a5 # copy back the shifter multiplicant to a4
ret
OR1:
mv a5,a4
slli a1,a1,1
slli a5,a5,1
li x3,1
or a5,a5,x3
beqz a2,exit
mv a4,a5
ret
exit:
ret
i tried above code and all the answer for 0xffffffff multiplied by 0x19 is 0x12ffffffe7 in RARS. It should be 0x18ffffffe7 as per calculator. If i do repeated addition of 0xffffffff 0x19 times i get the correct answer 0x18ffffffe7.
I managed to write a new routine that implements 32x32=64 bit multiplication using the shift and add with double register method. If multiplier lsb is 1 add multiplicand to high 32bit register then shift right high result & low result register 1 bit. If multiplier lsb is 0 shift the result high & low result register right 1 bit. Also shift right 1 bit the multiplier. This above process repeated 32 times. When multiplicand is added to the high result if carry occurs 1 is ORed to the MSB of the carry register. During shifting carry register, result_hi register, result_lo is shifted as a block towards right by 1 bit each time. This is the final code and works for the previous value 0xffffffff X 0x19 = 0x18ffffffe7. Thanks to everybody for the support
.data
result_lo: .word 0
result_hi: .word 0
modulo: .word 0
.text
li a1,0xffffffff # multiplicand
li a2,0x19 # multiplier
li a3,0x00000000 # result_lo
li a4,0x00000000 # result_hi
li a5,0 # working register
li x5,32 # number of bits to be tested/counter
loop:
mv x3,a2 # copy multiplier to test lsb 1 or 0
andi x3,x3,1 # extract lsb in x3
bnez x3,addnshift1 # if x3 is 1 branch to add and shift
call shift # if x3 is 0 call routine to shift result hi and lo + carry register right
addi x5,x5,-1 # decrease counter
bnez x5,loop # if counter is not 0 go to label loop
slli t6,t6,1 # if counter is 0, shift carry register left 1 time ( i dont know why but corrects answer)
j exit # exit multiplication procedure
addnshift1:
call addnshift # call addnshift routine to add multiplicand to result_hi and shift both result_hi & result_lo
addi x5,x5,-1 # decrease counter
bnez x5,loop # if counter is more than 0 branch to label loop
slli t6,t6,1 # if counter is 0, shift carry register left 1 time ( i dont know why but corrects answer)
j exit # exit multiplication procedure
shift:
srli a2,a2,1 # multiplier right shift, 1 lsb lost
srli a3,a3,1 # 2n low register(a3) right shift and 0 in msb (a4:a3)
mv x4,a4 # a copy of high 2n register(a4) to x4 (a4:a3)
andi x4,x4,1 # copy lsb of a4 high 2n register
beqz x4,lsb0 # if lsb extracted is 0 , branch to lsb0 label
li x4,0x80000000 # if lsb of a4 was 1
or a3,a3,x4 # lsb of a4 now in msb of a3. (a4:a3 >> 1)
lsb0:
srli a4,a4,1 # 2n high register right shift ,same as 0 shifted between a4 to a3 >>
srli t6,t6,1 # shift right carry register together with a4:a3
ret # return to main program
addnshift:
add a4,a4,a1 # add multiplicand to high 2n register
sltu x8 a4,a1 # set x8 to 1 if result of addition (a4 + a1) answer_hi and multiplicand
bnez x8,setcarry # if x8 is not 0 , branch to setcarry label
return:
srli a2,a2,1 # multiplier right shift
srli a3,a3,1 # 2n low register right shift and 0 in msb
mv x4,a4 # a copy of lw 2n
andi x4,x4,1 # copy lsb of a4 high 2n register
beqz x4,addlsb0 # if lsb extracted is 0 , branch to addlsb0 label
li x4,0x80000000 # if lsb of a4 was 1
or a3,a3,x4 # lsb of a4 now in msb of a3. (a4:a3 >> 1)
addlsb0:
srli a4,a4,1 # 2n high register right shift
srli t6,t6,1 # shift right carry register together with a4:a3
ret # return to main program
setcarry:
li x7,0x80000000 # set msb of x7 with 0x80000000
or t6,t6,x7 # set msb of x7 by oring t6 with x7
j return # jump to shifting routine
exit:
beqz t6,nocarry # if t6 is not set , 0 , no overflow occurred, branch to nocarry
mv a4,t6 # if carry set , copy t6 to answer hi register
nocarry:
la a0,result_hi #
sw a4,0(a0) # save to data section
la a0,result_lo
sw a3,0(a0) # save to data section
end:
j end
–