1 /*
   2  * Intel SHA Extensions optimized implementation of a SHA-256 update function
   3  *
   4  * This file is provided under a dual BSD/GPLv2 license.  When using or
   5  * redistributing this file, you may do so under either license.
   6  *
   7  * GPL LICENSE SUMMARY
   8  *
   9  * Copyright(c) 2015 Intel Corporation.
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of version 2 of the GNU General Public License as
  13  * published by the Free Software Foundation.
  14  *
  15  * This program is distributed in the hope that it will be useful, but
  16  * WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * General Public License for more details.
  19  *
  20  * Contact Information:
  21  *      Sean Gulley <sean.m.gulley@intel.com>
  22  *      Tim Chen <tim.c.chen@linux.intel.com>
  23  *
  24  * BSD LICENSE
  25  *
  26  * Copyright(c) 2015 Intel Corporation.
  27  * Copyright (c) 2018, Joyent, Inc.
  28  *
  29  * Redistribution and use in source and binary forms, with or without
  30  * modification, are permitted provided that the following conditions
  31  * are met:
  32  *
  33  *      * Redistributions of source code must retain the above copyright
  34  *        notice, this list of conditions and the following disclaimer.
  35  *      * Redistributions in binary form must reproduce the above copyright
  36  *        notice, this list of conditions and the following disclaimer in
  37  *        the documentation and/or other materials provided with the
  38  *        distribution.
  39  *      * Neither the name of Intel Corporation nor the names of its
  40  *        contributors may be used to endorse or promote products derived
  41  *        from this software without specific prior written permission.
  42  *
  43  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  44  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  45  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  46  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  47  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  48  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  49  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  50  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  51  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  52  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  53  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  54  *
  55  */
  56 
  57 /*
  58  * illumos uses this file under the terms of the BSD license.
  59  */
  60 
  61 #include <sys/asm_linkage.h>
  62 
  63 #define DIGEST_PTR      %rdi    /* 1st arg */
  64 #define DATA_PTR        %rsi    /* 2nd arg */
  65 #define NUM_BLKS        %rdx    /* 3rd arg */
  66 
  67 #define SHA256CONSTANTS %rax
  68 
  69 #define MSG             %xmm0
  70 #define STATE0          %xmm1
  71 #define STATE1          %xmm2
  72 #define MSGTMP0         %xmm3
  73 #define MSGTMP1         %xmm4
  74 #define MSGTMP2         %xmm5
  75 #define MSGTMP3         %xmm6
  76 #define MSGTMP4         %xmm7
  77 
  78 #define SHUF_MASK       %xmm8
  79 
  80 #define ABEF_SAVE       %xmm9
  81 #define CDGH_SAVE       %xmm10
  82 
  83 /*
  84  * Intel SHA Extensions optimized implementation of a SHA-256 update function
  85  *
  86  * The function takes a pointer to the current hash values, a pointer to the
  87  * input data, and a number of 64 byte blocks to process.  Once all blocks have
  88  * been processed, the digest pointer is  updated with the resulting hash value.
  89  * The function only processes complete blocks, there is no functionality to
  90  * store partial blocks.  All message padding and hash value initialization must
  91  * be done outside the update function.
  92  *
  93  * The indented lines in the loop are instructions related to rounds processing.
  94  * The non-indented lines are instructions related to the message schedule.
  95  *
  96  * void sha256_ni_transform(SHA256_CTX *digest, const void *data,
  97                 uint32_t numBlocks);
  98  * digest : pointer to digest
  99  * data: pointer to input data
 100  * numBlocks: Number of blocks to process
 101  */
 102 
 103 .text
 104 .align 32
 105 ENTRY_NP(SHA256TransformBlocks)
 106 
 107         shl             $6, NUM_BLKS            /*  convert to bytes */
 108         jz              .Ldone_hash
 109         add             DATA_PTR, NUM_BLKS      /* pointer to end of data */
 110 
 111         /*
 112          * load initial hash values
 113          * Need to reorder these appropriately
 114          * DCBA, HGFE -> ABEF, CDGH
 115          *
 116          * Offset DIGEST_PTR to account for the algorithm in the context.
 117          */
 118         addq            $8, DIGEST_PTR
 119         movdqu          0*16(DIGEST_PTR), STATE0
 120         movdqu          1*16(DIGEST_PTR), STATE1
 121 
 122         pshufd          $0xB1, STATE0,  STATE0          /* CDAB */
 123         pshufd          $0x1B, STATE1,  STATE1          /* EFGH */
 124         movdqa          STATE0, MSGTMP4
 125         palignr         $8, STATE1,  STATE0             /* ABEF */
 126         pblendw         $0xF0, MSGTMP4, STATE1          /* CDGH */
 127 
 128         movdqa          PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
 129         lea             K256(%rip), SHA256CONSTANTS
 130 
 131 .Lloop0:
 132         /* Save hash values for addition after rounds */
 133         movdqa          STATE0, ABEF_SAVE
 134         movdqa          STATE1, CDGH_SAVE
 135 
 136         /* Rounds 0-3 */
 137         movdqu          0*16(DATA_PTR), MSG
 138         pshufb          SHUF_MASK, MSG
 139         movdqa          MSG, MSGTMP0
 140                 paddd           0*16(SHA256CONSTANTS), MSG
 141                 sha256rnds2     STATE0, STATE1
 142                 pshufd          $0x0E, MSG, MSG
 143                 sha256rnds2     STATE1, STATE0
 144 
 145         /* Rounds 4-7 */
 146         movdqu          1*16(DATA_PTR), MSG
 147         pshufb          SHUF_MASK, MSG
 148         movdqa          MSG, MSGTMP1
 149                 paddd           1*16(SHA256CONSTANTS), MSG
 150                 sha256rnds2     STATE0, STATE1
 151                 pshufd          $0x0E, MSG, MSG
 152                 sha256rnds2     STATE1, STATE0
 153         sha256msg1      MSGTMP1, MSGTMP0
 154 
 155         /* Rounds 8-11 */
 156         movdqu          2*16(DATA_PTR), MSG
 157         pshufb          SHUF_MASK, MSG
 158         movdqa          MSG, MSGTMP2
 159                 paddd           2*16(SHA256CONSTANTS), MSG
 160                 sha256rnds2     STATE0, STATE1
 161                 pshufd          $0x0E, MSG, MSG
 162                 sha256rnds2     STATE1, STATE0
 163         sha256msg1      MSGTMP2, MSGTMP1
 164 
 165         /* Rounds 12-15 */
 166         movdqu          3*16(DATA_PTR), MSG
 167         pshufb          SHUF_MASK, MSG
 168         movdqa          MSG, MSGTMP3
 169                 paddd           3*16(SHA256CONSTANTS), MSG
 170                 sha256rnds2     STATE0, STATE1
 171         movdqa          MSGTMP3, MSGTMP4
 172         palignr         $4, MSGTMP2, MSGTMP4
 173         paddd           MSGTMP4, MSGTMP0
 174         sha256msg2      MSGTMP3, MSGTMP0
 175                 pshufd          $0x0E, MSG, MSG
 176                 sha256rnds2     STATE1, STATE0
 177         sha256msg1      MSGTMP3, MSGTMP2
 178 
 179         /* Rounds 16-19 */
 180         movdqa          MSGTMP0, MSG
 181                 paddd           4*16(SHA256CONSTANTS), MSG
 182                 sha256rnds2     STATE0, STATE1
 183         movdqa          MSGTMP0, MSGTMP4
 184         palignr         $4, MSGTMP3, MSGTMP4
 185         paddd           MSGTMP4, MSGTMP1
 186         sha256msg2      MSGTMP0, MSGTMP1
 187                 pshufd          $0x0E, MSG, MSG
 188                 sha256rnds2     STATE1, STATE0
 189         sha256msg1      MSGTMP0, MSGTMP3
 190 
 191         /* Rounds 20-23 */
 192         movdqa          MSGTMP1, MSG
 193                 paddd           5*16(SHA256CONSTANTS), MSG
 194                 sha256rnds2     STATE0, STATE1
 195         movdqa          MSGTMP1, MSGTMP4
 196         palignr         $4, MSGTMP0, MSGTMP4
 197         paddd           MSGTMP4, MSGTMP2
 198         sha256msg2      MSGTMP1, MSGTMP2
 199                 pshufd          $0x0E, MSG, MSG
 200                 sha256rnds2     STATE1, STATE0
 201         sha256msg1      MSGTMP1, MSGTMP0
 202 
 203         /* Rounds 24-27 */
 204         movdqa          MSGTMP2, MSG
 205                 paddd           6*16(SHA256CONSTANTS), MSG
 206                 sha256rnds2     STATE0, STATE1
 207         movdqa          MSGTMP2, MSGTMP4
 208         palignr         $4, MSGTMP1, MSGTMP4
 209         paddd           MSGTMP4, MSGTMP3
 210         sha256msg2      MSGTMP2, MSGTMP3
 211                 pshufd          $0x0E, MSG, MSG
 212                 sha256rnds2     STATE1, STATE0
 213         sha256msg1      MSGTMP2, MSGTMP1
 214 
 215         /* Rounds 28-31 */
 216         movdqa          MSGTMP3, MSG
 217                 paddd           7*16(SHA256CONSTANTS), MSG
 218                 sha256rnds2     STATE0, STATE1
 219         movdqa          MSGTMP3, MSGTMP4
 220         palignr         $4, MSGTMP2, MSGTMP4
 221         paddd           MSGTMP4, MSGTMP0
 222         sha256msg2      MSGTMP3, MSGTMP0
 223                 pshufd          $0x0E, MSG, MSG
 224                 sha256rnds2     STATE1, STATE0
 225         sha256msg1      MSGTMP3, MSGTMP2
 226 
 227         /* Rounds 32-35 */
 228         movdqa          MSGTMP0, MSG
 229                 paddd           8*16(SHA256CONSTANTS), MSG
 230                 sha256rnds2     STATE0, STATE1
 231         movdqa          MSGTMP0, MSGTMP4
 232         palignr         $4, MSGTMP3, MSGTMP4
 233         paddd           MSGTMP4, MSGTMP1
 234         sha256msg2      MSGTMP0, MSGTMP1
 235                 pshufd          $0x0E, MSG, MSG
 236                 sha256rnds2     STATE1, STATE0
 237         sha256msg1      MSGTMP0, MSGTMP3
 238 
 239         /* Rounds 36-39 */
 240         movdqa          MSGTMP1, MSG
 241                 paddd           9*16(SHA256CONSTANTS), MSG
 242                 sha256rnds2     STATE0, STATE1
 243         movdqa          MSGTMP1, MSGTMP4
 244         palignr         $4, MSGTMP0, MSGTMP4
 245         paddd           MSGTMP4, MSGTMP2
 246         sha256msg2      MSGTMP1, MSGTMP2
 247                 pshufd          $0x0E, MSG, MSG
 248                 sha256rnds2     STATE1, STATE0
 249         sha256msg1      MSGTMP1, MSGTMP0
 250 
 251         /* Rounds 40-43 */
 252         movdqa          MSGTMP2, MSG
 253                 paddd           10*16(SHA256CONSTANTS), MSG
 254                 sha256rnds2     STATE0, STATE1
 255         movdqa          MSGTMP2, MSGTMP4
 256         palignr         $4, MSGTMP1, MSGTMP4
 257         paddd           MSGTMP4, MSGTMP3
 258         sha256msg2      MSGTMP2, MSGTMP3
 259                 pshufd          $0x0E, MSG, MSG
 260                 sha256rnds2     STATE1, STATE0
 261         sha256msg1      MSGTMP2, MSGTMP1
 262 
 263         /* Rounds 44-47 */
 264         movdqa          MSGTMP3, MSG
 265                 paddd           11*16(SHA256CONSTANTS), MSG
 266                 sha256rnds2     STATE0, STATE1
 267         movdqa          MSGTMP3, MSGTMP4
 268         palignr         $4, MSGTMP2, MSGTMP4
 269         paddd           MSGTMP4, MSGTMP0
 270         sha256msg2      MSGTMP3, MSGTMP0
 271                 pshufd          $0x0E, MSG, MSG
 272                 sha256rnds2     STATE1, STATE0
 273         sha256msg1      MSGTMP3, MSGTMP2
 274 
 275         /* Rounds 48-51 */
 276         movdqa          MSGTMP0, MSG
 277                 paddd           12*16(SHA256CONSTANTS), MSG
 278                 sha256rnds2     STATE0, STATE1
 279         movdqa          MSGTMP0, MSGTMP4
 280         palignr         $4, MSGTMP3, MSGTMP4
 281         paddd           MSGTMP4, MSGTMP1
 282         sha256msg2      MSGTMP0, MSGTMP1
 283                 pshufd          $0x0E, MSG, MSG
 284                 sha256rnds2     STATE1, STATE0
 285         sha256msg1      MSGTMP0, MSGTMP3
 286 
 287         /* Rounds 52-55 */
 288         movdqa          MSGTMP1, MSG
 289                 paddd           13*16(SHA256CONSTANTS), MSG
 290                 sha256rnds2     STATE0, STATE1
 291         movdqa          MSGTMP1, MSGTMP4
 292         palignr         $4, MSGTMP0, MSGTMP4
 293         paddd           MSGTMP4, MSGTMP2
 294         sha256msg2      MSGTMP1, MSGTMP2
 295                 pshufd          $0x0E, MSG, MSG
 296                 sha256rnds2     STATE1, STATE0
 297 
 298         /* Rounds 56-59 */
 299         movdqa          MSGTMP2, MSG
 300                 paddd           14*16(SHA256CONSTANTS), MSG
 301                 sha256rnds2     STATE0, STATE1
 302         movdqa          MSGTMP2, MSGTMP4
 303         palignr         $4, MSGTMP1, MSGTMP4
 304         paddd           MSGTMP4, MSGTMP3
 305         sha256msg2      MSGTMP2, MSGTMP3
 306                 pshufd          $0x0E, MSG, MSG
 307                 sha256rnds2     STATE1, STATE0
 308 
 309         /* Rounds 60-63 */
 310         movdqa          MSGTMP3, MSG
 311                 paddd           15*16(SHA256CONSTANTS), MSG
 312                 sha256rnds2     STATE0, STATE1
 313                 pshufd          $0x0E, MSG, MSG
 314                 sha256rnds2     STATE1, STATE0
 315 
 316         /* Add current hash values with previously saved */
 317         paddd           ABEF_SAVE, STATE0
 318         paddd           CDGH_SAVE, STATE1
 319 
 320         /* Increment data pointer and loop if more to process */
 321         add             $64, DATA_PTR
 322         cmp             NUM_BLKS, DATA_PTR
 323         jne             .Lloop0
 324 
 325         /* Write hash values back in the correct order */
 326         pshufd          $0x1B, STATE0,  STATE0          /* FEBA */
 327         pshufd          $0xB1, STATE1,  STATE1          /* DCHG */
 328         movdqa          STATE0, MSGTMP4
 329         pblendw         $0xF0, STATE1,  STATE0          /* DCBA */
 330         palignr         $8, MSGTMP4, STATE1             /* HGFE */
 331 
 332         movdqu          STATE0, 0*16(DIGEST_PTR)
 333         movdqu          STATE1, 1*16(DIGEST_PTR)
 334 
 335 .Ldone_hash:
 336 
 337         ret
 338 SET_SIZE(SHA256TransformBlocks)
 339 
 340 .section        .rodata.cst256.K256, "aM", @progbits, 256
 341 .align 64
 342 K256:
 343         .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
 344         .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
 345         .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
 346         .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
 347         .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
 348         .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
 349         .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
 350         .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
 351         .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
 352         .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
 353         .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
 354         .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
 355         .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
 356         .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
 357         .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
 358         .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
 359 
 360 .section        .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
 361 .align 16
 362 PSHUFFLE_BYTE_FLIP_MASK:
 363         .octa 0x0c0d0e0f08090a0b0405060700010203