| /* |
| * fuc microcode for nv98 pcrypt engine |
| * Copyright (C) 2010 Marcin KoĆcielnicki |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| .section #nv98_pcrypt_data |
| |
| ctx_dma: |
| ctx_dma_query: .b32 0 |
| ctx_dma_src: .b32 0 |
| ctx_dma_dst: .b32 0 |
| .equ #dma_count 3 |
| ctx_query_address_high: .b32 0 |
| ctx_query_address_low: .b32 0 |
| ctx_query_counter: .b32 0 |
| ctx_cond_address_high: .b32 0 |
| ctx_cond_address_low: .b32 0 |
| ctx_cond_off: .b32 0 |
| ctx_src_address_high: .b32 0 |
| ctx_src_address_low: .b32 0 |
| ctx_dst_address_high: .b32 0 |
| ctx_dst_address_low: .b32 0 |
| ctx_mode: .b32 0 |
| .align 16 |
| ctx_key: .skip 16 |
| ctx_iv: .skip 16 |
| |
| .align 0x80 |
| swap: |
| .skip 32 |
| |
| .align 8 |
| common_cmd_dtable: |
| .b32 #ctx_query_address_high + 0x20000 ~0xff |
| .b32 #ctx_query_address_low + 0x20000 ~0xfffffff0 |
| .b32 #ctx_query_counter + 0x20000 ~0xffffffff |
| .b32 #cmd_query_get + 0x00000 ~1 |
| .b32 #ctx_cond_address_high + 0x20000 ~0xff |
| .b32 #ctx_cond_address_low + 0x20000 ~0xfffffff0 |
| .b32 #cmd_cond_mode + 0x00000 ~7 |
| .b32 #cmd_wrcache_flush + 0x00000 ~0 |
| .equ #common_cmd_max 0x88 |
| |
| |
| .align 8 |
| engine_cmd_dtable: |
| .b32 #ctx_key + 0x0 + 0x20000 ~0xffffffff |
| .b32 #ctx_key + 0x4 + 0x20000 ~0xffffffff |
| .b32 #ctx_key + 0x8 + 0x20000 ~0xffffffff |
| .b32 #ctx_key + 0xc + 0x20000 ~0xffffffff |
| .b32 #ctx_iv + 0x0 + 0x20000 ~0xffffffff |
| .b32 #ctx_iv + 0x4 + 0x20000 ~0xffffffff |
| .b32 #ctx_iv + 0x8 + 0x20000 ~0xffffffff |
| .b32 #ctx_iv + 0xc + 0x20000 ~0xffffffff |
| .b32 #ctx_src_address_high + 0x20000 ~0xff |
| .b32 #ctx_src_address_low + 0x20000 ~0xfffffff0 |
| .b32 #ctx_dst_address_high + 0x20000 ~0xff |
| .b32 #ctx_dst_address_low + 0x20000 ~0xfffffff0 |
| .b32 #crypt_cmd_mode + 0x00000 ~0xf |
| .b32 #crypt_cmd_length + 0x10000 ~0x0ffffff0 |
| .equ #engine_cmd_max 0xce |
| |
| .align 4 |
| crypt_dtable: |
| .b16 #crypt_copy_prep #crypt_do_inout |
| .b16 #crypt_store_prep #crypt_do_out |
| .b16 #crypt_ecb_e_prep #crypt_do_inout |
| .b16 #crypt_ecb_d_prep #crypt_do_inout |
| .b16 #crypt_cbc_e_prep #crypt_do_inout |
| .b16 #crypt_cbc_d_prep #crypt_do_inout |
| .b16 #crypt_pcbc_e_prep #crypt_do_inout |
| .b16 #crypt_pcbc_d_prep #crypt_do_inout |
| .b16 #crypt_cfb_e_prep #crypt_do_inout |
| .b16 #crypt_cfb_d_prep #crypt_do_inout |
| .b16 #crypt_ofb_prep #crypt_do_inout |
| .b16 #crypt_ctr_prep #crypt_do_inout |
| .b16 #crypt_cbc_mac_prep #crypt_do_in |
| .b16 #crypt_cmac_finish_complete_prep #crypt_do_in |
| .b16 #crypt_cmac_finish_partial_prep #crypt_do_in |
| |
| .align 0x100 |
| |
| .section #nv98_pcrypt_code |
| |
| // $r0 is always set to 0 in our code - this allows some space savings. |
| clear b32 $r0 |
| |
| // set up the interrupt handler |
| mov $r1 #ih |
| mov $iv0 $r1 |
| |
| // init stack pointer |
| mov $sp $r0 |
| |
| // set interrupt dispatch - route timer, fifo, ctxswitch to i0, others to host |
| movw $r1 0xfff0 |
| sethi $r1 0 |
| mov $r2 0x400 |
| iowr I[$r2 + 0x300] $r1 |
| |
| // enable the interrupts |
| or $r1 0xc |
| iowr I[$r2] $r1 |
| |
| // enable fifo access and context switching |
| mov $r1 3 |
| mov $r2 0x1200 |
| iowr I[$r2] $r1 |
| |
| // enable i0 delivery |
| bset $flags ie0 |
| |
| // sleep forver, waking only for interrupts. |
| bset $flags $p0 |
| spin: |
| sleep $p0 |
| bra #spin |
| |
| // i0 handler |
| ih: |
| // see which interrupts we got |
| iord $r1 I[$r0 + 0x200] |
| |
| and $r2 $r1 0x8 |
| cmpu b32 $r2 0 |
| bra e #noctx |
| |
| // context switch... prepare the regs for xfer |
| mov $r2 0x7700 |
| mov $xtargets $r2 |
| mov $xdbase $r0 |
| // 128-byte context. |
| mov $r2 0 |
| sethi $r2 0x50000 |
| |
| // read current channel |
| mov $r3 0x1400 |
| iord $r4 I[$r3] |
| // if bit 30 set, it's active, so we have to unload it first. |
| shl b32 $r5 $r4 1 |
| cmps b32 $r5 0 |
| bra nc #ctxload |
| |
| // unload the current channel - save the context |
| xdst $r0 $r2 |
| xdwait |
| // and clear bit 30, then write back |
| bclr $r4 0x1e |
| iowr I[$r3] $r4 |
| // tell PFIFO we unloaded |
| mov $r4 1 |
| iowr I[$r3 + 0x200] $r4 |
| |
| bra #noctx |
| |
| ctxload: |
| // no channel loaded - perhaps we're requested to load one |
| iord $r4 I[$r3 + 0x100] |
| shl b32 $r15 $r4 1 |
| cmps b32 $r15 0 |
| // if bit 30 of next channel not set, probably PFIFO is just |
| // killing a context. do a faux load, without the active bit. |
| bra nc #dummyload |
| |
| // ok, do a real context load. |
| xdld $r0 $r2 |
| xdwait |
| mov $r5 #ctx_dma |
| mov $r6 #dma_count - 1 |
| ctxload_dma_loop: |
| ld b32 $r7 D[$r5 + $r6 * 4] |
| add b32 $r8 $r6 0x180 |
| shl b32 $r8 8 |
| iowr I[$r8] $r7 |
| sub b32 $r6 1 |
| bra nc #ctxload_dma_loop |
| |
| dummyload: |
| // tell PFIFO we're done |
| mov $r5 2 |
| iowr I[$r3 + 0x200] $r5 |
| |
| noctx: |
| and $r2 $r1 0x4 |
| cmpu b32 $r2 0 |
| bra e #nocmd |
| |
| // incoming fifo command. |
| mov $r3 0x1900 |
| iord $r2 I[$r3 + 0x100] |
| iord $r3 I[$r3] |
| // extract the method |
| and $r4 $r2 0x7ff |
| // shift the addr to proper position if we need to interrupt later |
| shl b32 $r2 0x10 |
| |
| // mthd 0 and 0x100 [NAME, NOP]: ignore |
| and $r5 $r4 0x7bf |
| cmpu b32 $r5 0 |
| bra e #cmddone |
| |
| mov $r5 #engine_cmd_dtable - 0xc0 * 8 |
| mov $r6 #engine_cmd_max |
| cmpu b32 $r4 0xc0 |
| bra nc #dtable_cmd |
| mov $r5 #common_cmd_dtable - 0x80 * 8 |
| mov $r6 #common_cmd_max |
| cmpu b32 $r4 0x80 |
| bra nc #dtable_cmd |
| cmpu b32 $r4 0x60 |
| bra nc #dma_cmd |
| cmpu b32 $r4 0x50 |
| bra ne #illegal_mthd |
| |
| // mthd 0x140: PM_TRIGGER |
| mov $r2 0x2200 |
| clear b32 $r3 |
| sethi $r3 0x20000 |
| iowr I[$r2] $r3 |
| bra #cmddone |
| |
| dma_cmd: |
| // mthd 0x180...: DMA_* |
| cmpu b32 $r4 0x60+#dma_count |
| bra nc #illegal_mthd |
| shl b32 $r5 $r4 2 |
| add b32 $r5 (#ctx_dma - 0x60 * 4) & 0xffff |
| bset $r3 0x1e |
| st b32 D[$r5] $r3 |
| add b32 $r4 0x180 - 0x60 |
| shl b32 $r4 8 |
| iowr I[$r4] $r3 |
| bra #cmddone |
| |
| dtable_cmd: |
| cmpu b32 $r4 $r6 |
| bra nc #illegal_mthd |
| shl b32 $r4 3 |
| add b32 $r4 $r5 |
| ld b32 $r5 D[$r4 + 4] |
| and $r5 $r3 |
| cmpu b32 $r5 0 |
| bra ne #invalid_bitfield |
| ld b16 $r5 D[$r4] |
| ld b16 $r6 D[$r4 + 2] |
| cmpu b32 $r6 2 |
| bra e #cmd_setctx |
| ld b32 $r7 D[$r0 + #ctx_cond_off] |
| and $r6 $r7 |
| cmpu b32 $r6 1 |
| bra e #cmddone |
| call $r5 |
| bra $p1 #dispatch_error |
| bra #cmddone |
| |
| cmd_setctx: |
| st b32 D[$r5] $r3 |
| bra #cmddone |
| |
| |
| invalid_bitfield: |
| or $r2 1 |
| dispatch_error: |
| illegal_mthd: |
| mov $r4 0x1000 |
| iowr I[$r4] $r2 |
| iowr I[$r4 + 0x100] $r3 |
| mov $r4 0x40 |
| iowr I[$r0] $r4 |
| |
| im_loop: |
| iord $r4 I[$r0 + 0x200] |
| and $r4 0x40 |
| cmpu b32 $r4 0 |
| bra ne #im_loop |
| |
| cmddone: |
| // remove the command from FIFO |
| mov $r3 0x1d00 |
| mov $r4 1 |
| iowr I[$r3] $r4 |
| |
| nocmd: |
| // ack the processed interrupts |
| and $r1 $r1 0xc |
| iowr I[$r0 + 0x100] $r1 |
| iret |
| |
| cmd_query_get: |
| // if bit 0 of param set, trigger interrupt afterwards. |
| setp $p1 $r3 |
| or $r2 3 |
| |
| // read PTIMER, beware of races... |
| mov $r4 0xb00 |
| ptimer_retry: |
| iord $r6 I[$r4 + 0x100] |
| iord $r5 I[$r4] |
| iord $r7 I[$r4 + 0x100] |
| cmpu b32 $r6 $r7 |
| bra ne #ptimer_retry |
| |
| // prepare the query structure |
| ld b32 $r4 D[$r0 + #ctx_query_counter] |
| st b32 D[$r0 + #swap + 0x0] $r4 |
| st b32 D[$r0 + #swap + 0x4] $r0 |
| st b32 D[$r0 + #swap + 0x8] $r5 |
| st b32 D[$r0 + #swap + 0xc] $r6 |
| |
| // will use target 0, DMA_QUERY. |
| mov $xtargets $r0 |
| |
| ld b32 $r4 D[$r0 + #ctx_query_address_high] |
| shl b32 $r4 0x18 |
| mov $xdbase $r4 |
| |
| ld b32 $r4 D[$r0 + #ctx_query_address_low] |
| mov $r5 #swap |
| sethi $r5 0x20000 |
| xdst $r4 $r5 |
| xdwait |
| |
| ret |
| |
| cmd_cond_mode: |
| // if >= 5, INVALID_ENUM |
| bset $flags $p1 |
| or $r2 2 |
| cmpu b32 $r3 5 |
| bra nc #return |
| |
| // otherwise, no error. |
| bclr $flags $p1 |
| |
| // if < 2, no QUERY object is involved |
| cmpu b32 $r3 2 |
| bra nc #cmd_cond_mode_queryful |
| |
| xor $r3 1 |
| st b32 D[$r0 + #ctx_cond_off] $r3 |
| return: |
| ret |
| |
| cmd_cond_mode_queryful: |
| // ok, will need to pull a QUERY object, prepare offsets |
| ld b32 $r4 D[$r0 + #ctx_cond_address_high] |
| ld b32 $r5 D[$r0 + #ctx_cond_address_low] |
| and $r6 $r5 0xff |
| shr b32 $r5 8 |
| shl b32 $r4 0x18 |
| or $r4 $r5 |
| mov $xdbase $r4 |
| mov $xtargets $r0 |
| |
| // pull the first one |
| mov $r5 #swap |
| sethi $r5 0x20000 |
| xdld $r6 $r5 |
| |
| // if == 2, only a single QUERY is involved... |
| cmpu b32 $r3 2 |
| bra ne #cmd_cond_mode_double |
| |
| xdwait |
| ld b32 $r4 D[$r0 + #swap + 4] |
| cmpu b32 $r4 0 |
| xbit $r4 $flags z |
| st b32 D[$r0 + #ctx_cond_off] $r4 |
| ret |
| |
| // ok, we'll need to pull second one too |
| cmd_cond_mode_double: |
| add b32 $r6 0x10 |
| add b32 $r5 0x10 |
| xdld $r6 $r5 |
| xdwait |
| |
| // compare COUNTERs |
| ld b32 $r5 D[$r0 + #swap + 0x00] |
| ld b32 $r6 D[$r0 + #swap + 0x10] |
| cmpu b32 $r5 $r6 |
| xbit $r4 $flags z |
| |
| // compare RESen |
| ld b32 $r5 D[$r0 + #swap + 0x04] |
| ld b32 $r6 D[$r0 + #swap + 0x14] |
| cmpu b32 $r5 $r6 |
| xbit $r5 $flags z |
| and $r4 $r5 |
| |
| // and negate or not, depending on mode |
| cmpu b32 $r3 3 |
| xbit $r5 $flags z |
| xor $r4 $r5 |
| st b32 D[$r0 + #ctx_cond_off] $r4 |
| ret |
| |
| cmd_wrcache_flush: |
| bclr $flags $p1 |
| mov $r2 0x2200 |
| clear b32 $r3 |
| sethi $r3 0x10000 |
| iowr I[$r2] $r3 |
| ret |
| |
| crypt_cmd_mode: |
| // if >= 0xf, INVALID_ENUM |
| bset $flags $p1 |
| or $r2 2 |
| cmpu b32 $r3 0xf |
| bra nc #crypt_cmd_mode_return |
| |
| bclr $flags $p1 |
| st b32 D[$r0 + #ctx_mode] $r3 |
| |
| crypt_cmd_mode_return: |
| ret |
| |
| crypt_cmd_length: |
| // nop if length == 0 |
| cmpu b32 $r3 0 |
| bra e #crypt_cmd_mode_return |
| |
| // init key, IV |
| cxset 3 |
| mov $r4 #ctx_key |
| sethi $r4 0x70000 |
| xdst $r0 $r4 |
| mov $r4 #ctx_iv |
| sethi $r4 0x60000 |
| xdst $r0 $r4 |
| xdwait |
| ckeyreg $c7 |
| |
| // prepare the targets |
| mov $r4 0x2100 |
| mov $xtargets $r4 |
| |
| // prepare src address |
| ld b32 $r4 D[$r0 + #ctx_src_address_high] |
| ld b32 $r5 D[$r0 + #ctx_src_address_low] |
| shr b32 $r8 $r5 8 |
| shl b32 $r4 0x18 |
| or $r4 $r8 |
| and $r5 $r5 0xff |
| |
| // prepare dst address |
| ld b32 $r6 D[$r0 + #ctx_dst_address_high] |
| ld b32 $r7 D[$r0 + #ctx_dst_address_low] |
| shr b32 $r8 $r7 8 |
| shl b32 $r6 0x18 |
| or $r6 $r8 |
| and $r7 $r7 0xff |
| |
| // find the proper prep & do functions |
| ld b32 $r8 D[$r0 + #ctx_mode] |
| shl b32 $r8 2 |
| |
| // run prep |
| ld b16 $r9 D[$r8 + #crypt_dtable] |
| call $r9 |
| |
| // do it |
| ld b16 $r9 D[$r8 + #crypt_dtable + 2] |
| call $r9 |
| cxset 1 |
| xdwait |
| cxset 0x61 |
| xdwait |
| xdwait |
| |
| // update src address |
| shr b32 $r8 $r4 0x18 |
| shl b32 $r9 $r4 8 |
| add b32 $r9 $r5 |
| adc b32 $r8 0 |
| st b32 D[$r0 + #ctx_src_address_high] $r8 |
| st b32 D[$r0 + #ctx_src_address_low] $r9 |
| |
| // update dst address |
| shr b32 $r8 $r6 0x18 |
| shl b32 $r9 $r6 8 |
| add b32 $r9 $r7 |
| adc b32 $r8 0 |
| st b32 D[$r0 + #ctx_dst_address_high] $r8 |
| st b32 D[$r0 + #ctx_dst_address_low] $r9 |
| |
| // pull updated IV |
| cxset 2 |
| mov $r4 #ctx_iv |
| sethi $r4 0x60000 |
| xdld $r0 $r4 |
| xdwait |
| |
| ret |
| |
| |
| crypt_copy_prep: |
| cs0begin 2 |
| cxsin $c0 |
| cxsout $c0 |
| ret |
| |
| crypt_store_prep: |
| cs0begin 1 |
| cxsout $c6 |
| ret |
| |
| crypt_ecb_e_prep: |
| cs0begin 3 |
| cxsin $c0 |
| cenc $c0 $c0 |
| cxsout $c0 |
| ret |
| |
| crypt_ecb_d_prep: |
| ckexp $c7 $c7 |
| cs0begin 3 |
| cxsin $c0 |
| cdec $c0 $c0 |
| cxsout $c0 |
| ret |
| |
| crypt_cbc_e_prep: |
| cs0begin 4 |
| cxsin $c0 |
| cxor $c6 $c0 |
| cenc $c6 $c6 |
| cxsout $c6 |
| ret |
| |
| crypt_cbc_d_prep: |
| ckexp $c7 $c7 |
| cs0begin 5 |
| cmov $c2 $c6 |
| cxsin $c6 |
| cdec $c0 $c6 |
| cxor $c0 $c2 |
| cxsout $c0 |
| ret |
| |
| crypt_pcbc_e_prep: |
| cs0begin 5 |
| cxsin $c0 |
| cxor $c6 $c0 |
| cenc $c6 $c6 |
| cxsout $c6 |
| cxor $c6 $c0 |
| ret |
| |
| crypt_pcbc_d_prep: |
| ckexp $c7 $c7 |
| cs0begin 5 |
| cxsin $c0 |
| cdec $c1 $c0 |
| cxor $c6 $c1 |
| cxsout $c6 |
| cxor $c6 $c0 |
| ret |
| |
| crypt_cfb_e_prep: |
| cs0begin 4 |
| cenc $c6 $c6 |
| cxsin $c0 |
| cxor $c6 $c0 |
| cxsout $c6 |
| ret |
| |
| crypt_cfb_d_prep: |
| cs0begin 4 |
| cenc $c0 $c6 |
| cxsin $c6 |
| cxor $c0 $c6 |
| cxsout $c0 |
| ret |
| |
| crypt_ofb_prep: |
| cs0begin 4 |
| cenc $c6 $c6 |
| cxsin $c0 |
| cxor $c0 $c6 |
| cxsout $c0 |
| ret |
| |
| crypt_ctr_prep: |
| cs0begin 5 |
| cenc $c1 $c6 |
| cadd $c6 1 |
| cxsin $c0 |
| cxor $c0 $c1 |
| cxsout $c0 |
| ret |
| |
| crypt_cbc_mac_prep: |
| cs0begin 3 |
| cxsin $c0 |
| cxor $c6 $c0 |
| cenc $c6 $c6 |
| ret |
| |
| crypt_cmac_finish_complete_prep: |
| cs0begin 7 |
| cxsin $c0 |
| cxor $c6 $c0 |
| cxor $c0 $c0 |
| cenc $c0 $c0 |
| cprecmac $c0 $c0 |
| cxor $c6 $c0 |
| cenc $c6 $c6 |
| ret |
| |
| crypt_cmac_finish_partial_prep: |
| cs0begin 8 |
| cxsin $c0 |
| cxor $c6 $c0 |
| cxor $c0 $c0 |
| cenc $c0 $c0 |
| cprecmac $c0 $c0 |
| cprecmac $c0 $c0 |
| cxor $c6 $c0 |
| cenc $c6 $c6 |
| ret |
| |
| // TODO |
| crypt_do_in: |
| add b32 $r3 $r5 |
| mov $xdbase $r4 |
| mov $r9 #swap |
| sethi $r9 0x20000 |
| crypt_do_in_loop: |
| xdld $r5 $r9 |
| xdwait |
| cxset 0x22 |
| xdst $r0 $r9 |
| cs0exec 1 |
| xdwait |
| add b32 $r5 0x10 |
| cmpu b32 $r5 $r3 |
| bra ne #crypt_do_in_loop |
| cxset 1 |
| xdwait |
| ret |
| |
| crypt_do_out: |
| add b32 $r3 $r7 |
| mov $xdbase $r6 |
| mov $r9 #swap |
| sethi $r9 0x20000 |
| crypt_do_out_loop: |
| cs0exec 1 |
| cxset 0x61 |
| xdld $r7 $r9 |
| xdst $r7 $r9 |
| cxset 1 |
| xdwait |
| add b32 $r7 0x10 |
| cmpu b32 $r7 $r3 |
| bra ne #crypt_do_out_loop |
| ret |
| |
| crypt_do_inout: |
| add b32 $r3 $r5 |
| mov $r9 #swap |
| sethi $r9 0x20000 |
| crypt_do_inout_loop: |
| mov $xdbase $r4 |
| xdld $r5 $r9 |
| xdwait |
| cxset 0x21 |
| xdst $r0 $r9 |
| cs0exec 1 |
| cxset 0x61 |
| mov $xdbase $r6 |
| xdld $r7 $r9 |
| xdst $r7 $r9 |
| cxset 1 |
| xdwait |
| add b32 $r5 0x10 |
| add b32 $r7 0x10 |
| cmpu b32 $r5 $r3 |
| bra ne #crypt_do_inout_loop |
| ret |
| |
| .align 0x100 |