1 /* 2 * Intel SHA Extensions optimized implementation of a SHA-256 update function 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * Copyright(c) 2015 Intel Corporation. 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of version 2 of the GNU General Public License as 13 * published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * Contact Information: 21 * Sean Gulley <sean.m.gulley@intel.com> 22 * Tim Chen <tim.c.chen@linux.intel.com> 23 * 24 * BSD LICENSE 25 * 26 * Copyright(c) 2015 Intel Corporation. 27 * Copyright (c) 2018, Joyent, Inc. 28 * 29 * Redistribution and use in source and binary forms, with or without 30 * modification, are permitted provided that the following conditions 31 * are met: 32 * 33 * * Redistributions of source code must retain the above copyright 34 * notice, this list of conditions and the following disclaimer. 35 * * Redistributions in binary form must reproduce the above copyright 36 * notice, this list of conditions and the following disclaimer in 37 * the documentation and/or other materials provided with the 38 * distribution. 39 * * Neither the name of Intel Corporation nor the names of its 40 * contributors may be used to endorse or promote products derived 41 * from this software without specific prior written permission. 42 * 43 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 44 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 45 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 46 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 47 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 53 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 * 55 */ 56 57 /* 58 * illumos uses this file under the terms of the BSD license. 59 */ 60 61 #include <sys/asm_linkage.h> 62 63 #define DIGEST_PTR %rdi /* 1st arg */ 64 #define DATA_PTR %rsi /* 2nd arg */ 65 #define NUM_BLKS %rdx /* 3rd arg */ 66 67 #define SHA256CONSTANTS %rax 68 69 #define MSG %xmm0 70 #define STATE0 %xmm1 71 #define STATE1 %xmm2 72 #define MSGTMP0 %xmm3 73 #define MSGTMP1 %xmm4 74 #define MSGTMP2 %xmm5 75 #define MSGTMP3 %xmm6 76 #define MSGTMP4 %xmm7 77 78 #define SHUF_MASK %xmm8 79 80 #define ABEF_SAVE %xmm9 81 #define CDGH_SAVE %xmm10 82 83 /* 84 * Intel SHA Extensions optimized implementation of a SHA-256 update function 85 * 86 * The function takes a pointer to the current hash values, a pointer to the 87 * input data, and a number of 64 byte blocks to process. Once all blocks have 88 * been processed, the digest pointer is updated with the resulting hash value. 89 * The function only processes complete blocks, there is no functionality to 90 * store partial blocks. All message padding and hash value initialization must 91 * be done outside the update function. 92 * 93 * The indented lines in the loop are instructions related to rounds processing. 94 * The non-indented lines are instructions related to the message schedule. 95 * 96 * void sha256_ni_transform(SHA256_CTX *digest, const void *data, 97 uint32_t numBlocks); 98 * digest : pointer to digest 99 * data: pointer to input data 100 * numBlocks: Number of blocks to process 101 */ 102 103 .text 104 .align 32 105 ENTRY_NP(SHA256TransformBlocks) 106 107 shl $6, NUM_BLKS /* convert to bytes */ 108 jz .Ldone_hash 109 add DATA_PTR, NUM_BLKS /* pointer to end of data */ 110 111 /* 112 * load initial hash values 113 * Need to reorder these appropriately 114 * DCBA, HGFE -> ABEF, CDGH 115 * 116 * Offset DIGEST_PTR to account for the algorithm in the context. 117 */ 118 addq $8, DIGEST_PTR 119 movdqu 0*16(DIGEST_PTR), STATE0 120 movdqu 1*16(DIGEST_PTR), STATE1 121 122 pshufd $0xB1, STATE0, STATE0 /* CDAB */ 123 pshufd $0x1B, STATE1, STATE1 /* EFGH */ 124 movdqa STATE0, MSGTMP4 125 palignr $8, STATE1, STATE0 /* ABEF */ 126 pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ 127 128 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 129 lea K256(%rip), SHA256CONSTANTS 130 131 .Lloop0: 132 /* Save hash values for addition after rounds */ 133 movdqa STATE0, ABEF_SAVE 134 movdqa STATE1, CDGH_SAVE 135 136 /* Rounds 0-3 */ 137 movdqu 0*16(DATA_PTR), MSG 138 pshufb SHUF_MASK, MSG 139 movdqa MSG, MSGTMP0 140 paddd 0*16(SHA256CONSTANTS), MSG 141 sha256rnds2 STATE0, STATE1 142 pshufd $0x0E, MSG, MSG 143 sha256rnds2 STATE1, STATE0 144 145 /* Rounds 4-7 */ 146 movdqu 1*16(DATA_PTR), MSG 147 pshufb SHUF_MASK, MSG 148 movdqa MSG, MSGTMP1 149 paddd 1*16(SHA256CONSTANTS), MSG 150 sha256rnds2 STATE0, STATE1 151 pshufd $0x0E, MSG, MSG 152 sha256rnds2 STATE1, STATE0 153 sha256msg1 MSGTMP1, MSGTMP0 154 155 /* Rounds 8-11 */ 156 movdqu 2*16(DATA_PTR), MSG 157 pshufb SHUF_MASK, MSG 158 movdqa MSG, MSGTMP2 159 paddd 2*16(SHA256CONSTANTS), MSG 160 sha256rnds2 STATE0, STATE1 161 pshufd $0x0E, MSG, MSG 162 sha256rnds2 STATE1, STATE0 163 sha256msg1 MSGTMP2, MSGTMP1 164 165 /* Rounds 12-15 */ 166 movdqu 3*16(DATA_PTR), MSG 167 pshufb SHUF_MASK, MSG 168 movdqa MSG, MSGTMP3 169 paddd 3*16(SHA256CONSTANTS), MSG 170 sha256rnds2 STATE0, STATE1 171 movdqa MSGTMP3, MSGTMP4 172 palignr $4, MSGTMP2, MSGTMP4 173 paddd MSGTMP4, MSGTMP0 174 sha256msg2 MSGTMP3, MSGTMP0 175 pshufd $0x0E, MSG, MSG 176 sha256rnds2 STATE1, STATE0 177 sha256msg1 MSGTMP3, MSGTMP2 178 179 /* Rounds 16-19 */ 180 movdqa MSGTMP0, MSG 181 paddd 4*16(SHA256CONSTANTS), MSG 182 sha256rnds2 STATE0, STATE1 183 movdqa MSGTMP0, MSGTMP4 184 palignr $4, MSGTMP3, MSGTMP4 185 paddd MSGTMP4, MSGTMP1 186 sha256msg2 MSGTMP0, MSGTMP1 187 pshufd $0x0E, MSG, MSG 188 sha256rnds2 STATE1, STATE0 189 sha256msg1 MSGTMP0, MSGTMP3 190 191 /* Rounds 20-23 */ 192 movdqa MSGTMP1, MSG 193 paddd 5*16(SHA256CONSTANTS), MSG 194 sha256rnds2 STATE0, STATE1 195 movdqa MSGTMP1, MSGTMP4 196 palignr $4, MSGTMP0, MSGTMP4 197 paddd MSGTMP4, MSGTMP2 198 sha256msg2 MSGTMP1, MSGTMP2 199 pshufd $0x0E, MSG, MSG 200 sha256rnds2 STATE1, STATE0 201 sha256msg1 MSGTMP1, MSGTMP0 202 203 /* Rounds 24-27 */ 204 movdqa MSGTMP2, MSG 205 paddd 6*16(SHA256CONSTANTS), MSG 206 sha256rnds2 STATE0, STATE1 207 movdqa MSGTMP2, MSGTMP4 208 palignr $4, MSGTMP1, MSGTMP4 209 paddd MSGTMP4, MSGTMP3 210 sha256msg2 MSGTMP2, MSGTMP3 211 pshufd $0x0E, MSG, MSG 212 sha256rnds2 STATE1, STATE0 213 sha256msg1 MSGTMP2, MSGTMP1 214 215 /* Rounds 28-31 */ 216 movdqa MSGTMP3, MSG 217 paddd 7*16(SHA256CONSTANTS), MSG 218 sha256rnds2 STATE0, STATE1 219 movdqa MSGTMP3, MSGTMP4 220 palignr $4, MSGTMP2, MSGTMP4 221 paddd MSGTMP4, MSGTMP0 222 sha256msg2 MSGTMP3, MSGTMP0 223 pshufd $0x0E, MSG, MSG 224 sha256rnds2 STATE1, STATE0 225 sha256msg1 MSGTMP3, MSGTMP2 226 227 /* Rounds 32-35 */ 228 movdqa MSGTMP0, MSG 229 paddd 8*16(SHA256CONSTANTS), MSG 230 sha256rnds2 STATE0, STATE1 231 movdqa MSGTMP0, MSGTMP4 232 palignr $4, MSGTMP3, MSGTMP4 233 paddd MSGTMP4, MSGTMP1 234 sha256msg2 MSGTMP0, MSGTMP1 235 pshufd $0x0E, MSG, MSG 236 sha256rnds2 STATE1, STATE0 237 sha256msg1 MSGTMP0, MSGTMP3 238 239 /* Rounds 36-39 */ 240 movdqa MSGTMP1, MSG 241 paddd 9*16(SHA256CONSTANTS), MSG 242 sha256rnds2 STATE0, STATE1 243 movdqa MSGTMP1, MSGTMP4 244 palignr $4, MSGTMP0, MSGTMP4 245 paddd MSGTMP4, MSGTMP2 246 sha256msg2 MSGTMP1, MSGTMP2 247 pshufd $0x0E, MSG, MSG 248 sha256rnds2 STATE1, STATE0 249 sha256msg1 MSGTMP1, MSGTMP0 250 251 /* Rounds 40-43 */ 252 movdqa MSGTMP2, MSG 253 paddd 10*16(SHA256CONSTANTS), MSG 254 sha256rnds2 STATE0, STATE1 255 movdqa MSGTMP2, MSGTMP4 256 palignr $4, MSGTMP1, MSGTMP4 257 paddd MSGTMP4, MSGTMP3 258 sha256msg2 MSGTMP2, MSGTMP3 259 pshufd $0x0E, MSG, MSG 260 sha256rnds2 STATE1, STATE0 261 sha256msg1 MSGTMP2, MSGTMP1 262 263 /* Rounds 44-47 */ 264 movdqa MSGTMP3, MSG 265 paddd 11*16(SHA256CONSTANTS), MSG 266 sha256rnds2 STATE0, STATE1 267 movdqa MSGTMP3, MSGTMP4 268 palignr $4, MSGTMP2, MSGTMP4 269 paddd MSGTMP4, MSGTMP0 270 sha256msg2 MSGTMP3, MSGTMP0 271 pshufd $0x0E, MSG, MSG 272 sha256rnds2 STATE1, STATE0 273 sha256msg1 MSGTMP3, MSGTMP2 274 275 /* Rounds 48-51 */ 276 movdqa MSGTMP0, MSG 277 paddd 12*16(SHA256CONSTANTS), MSG 278 sha256rnds2 STATE0, STATE1 279 movdqa MSGTMP0, MSGTMP4 280 palignr $4, MSGTMP3, MSGTMP4 281 paddd MSGTMP4, MSGTMP1 282 sha256msg2 MSGTMP0, MSGTMP1 283 pshufd $0x0E, MSG, MSG 284 sha256rnds2 STATE1, STATE0 285 sha256msg1 MSGTMP0, MSGTMP3 286 287 /* Rounds 52-55 */ 288 movdqa MSGTMP1, MSG 289 paddd 13*16(SHA256CONSTANTS), MSG 290 sha256rnds2 STATE0, STATE1 291 movdqa MSGTMP1, MSGTMP4 292 palignr $4, MSGTMP0, MSGTMP4 293 paddd MSGTMP4, MSGTMP2 294 sha256msg2 MSGTMP1, MSGTMP2 295 pshufd $0x0E, MSG, MSG 296 sha256rnds2 STATE1, STATE0 297 298 /* Rounds 56-59 */ 299 movdqa MSGTMP2, MSG 300 paddd 14*16(SHA256CONSTANTS), MSG 301 sha256rnds2 STATE0, STATE1 302 movdqa MSGTMP2, MSGTMP4 303 palignr $4, MSGTMP1, MSGTMP4 304 paddd MSGTMP4, MSGTMP3 305 sha256msg2 MSGTMP2, MSGTMP3 306 pshufd $0x0E, MSG, MSG 307 sha256rnds2 STATE1, STATE0 308 309 /* Rounds 60-63 */ 310 movdqa MSGTMP3, MSG 311 paddd 15*16(SHA256CONSTANTS), MSG 312 sha256rnds2 STATE0, STATE1 313 pshufd $0x0E, MSG, MSG 314 sha256rnds2 STATE1, STATE0 315 316 /* Add current hash values with previously saved */ 317 paddd ABEF_SAVE, STATE0 318 paddd CDGH_SAVE, STATE1 319 320 /* Increment data pointer and loop if more to process */ 321 add $64, DATA_PTR 322 cmp NUM_BLKS, DATA_PTR 323 jne .Lloop0 324 325 /* Write hash values back in the correct order */ 326 pshufd $0x1B, STATE0, STATE0 /* FEBA */ 327 pshufd $0xB1, STATE1, STATE1 /* DCHG */ 328 movdqa STATE0, MSGTMP4 329 pblendw $0xF0, STATE1, STATE0 /* DCBA */ 330 palignr $8, MSGTMP4, STATE1 /* HGFE */ 331 332 movdqu STATE0, 0*16(DIGEST_PTR) 333 movdqu STATE1, 1*16(DIGEST_PTR) 334 335 .Ldone_hash: 336 337 ret 338 SET_SIZE(SHA256TransformBlocks) 339 340 .section .rodata.cst256.K256, "aM", @progbits, 256 341 .align 64 342 K256: 343 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 344 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 345 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 346 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 347 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 348 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 349 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 350 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 351 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 352 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 353 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 354 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 355 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 356 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 357 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 358 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 359 360 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 361 .align 16 362 PSHUFFLE_BYTE_FLIP_MASK: 363 .octa 0x0c0d0e0f08090a0b0405060700010203