1 /* 2 * Intel SHA Extensions optimized implementation of a SHA-1 update function 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * Copyright(c) 2015 Intel Corporation. 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of version 2 of the GNU General Public License as 13 * published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * Contact Information: 21 * Sean Gulley <sean.m.gulley@intel.com> 22 * Tim Chen <tim.c.chen@linux.intel.com> 23 * 24 * BSD LICENSE 25 * 26 * Copyright(c) 2015 Intel Corporation. 27 * Copyright (c) 2018, Joyent, Inc. 28 * 29 * Redistribution and use in source and binary forms, with or without 30 * modification, are permitted provided that the following conditions 31 * are met: 32 * 33 * * Redistributions of source code must retain the above copyright 34 * notice, this list of conditions and the following disclaimer. 35 * * Redistributions in binary form must reproduce the above copyright 36 * notice, this list of conditions and the following disclaimer in 37 * the documentation and/or other materials provided with the 38 * distribution. 39 * * Neither the name of Intel Corporation nor the names of its 40 * contributors may be used to endorse or promote products derived 41 * from this software without specific prior written permission. 42 * 43 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 44 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 45 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 46 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 47 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 53 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 * 55 */ 56 57 /* 58 * illumos uses this file under the terms of the BSD license. 59 */ 60 61 #include <sys/asm_linkage.h> 62 63 #define DIGEST_PTR %rdi /* 1st arg */ 64 #define DATA_PTR %rsi /* 2nd arg */ 65 #define NUM_BLKS %rdx /* 3rd arg */ 66 67 #define RSPSAVE %rax 68 69 /* gcc conversion */ 70 #define FRAME_SIZE 32 /* space for 2x16 bytes */ 71 72 #define ABCD %xmm0 73 #define E0 %xmm1 /* Need two E's b/c they ping pong */ 74 #define E1 %xmm2 75 #define MSG0 %xmm3 76 #define MSG1 %xmm4 77 #define MSG2 %xmm5 78 #define MSG3 %xmm6 79 #define SHUF_MASK %xmm7 80 81 82 /* 83 * Intel SHA Extensions optimized implementation of a SHA-1 update function 84 * 85 * The function takes a pointer to the current hash values, a pointer to the 86 * input data, and a number of 64 byte blocks to process. Once all blocks have 87 * been processed, the digest pointer is updated with the resulting hash value. 88 * The function only processes complete blocks, there is no functionality to 89 * store partial blocks. All message padding and hash value initialization must 90 * be done outside the update function. 91 * 92 * The indented lines in the loop are instructions related to rounds processing. 93 * The non-indented lines are instructions related to the message schedule. 94 * 95 * void sha1_block_data_order(uint32_t *digest, const void *data, 96 uint32_t numBlocks) 97 * digest : pointer to digest 98 * data: pointer to input data 99 * numBlocks: Number of blocks to process 100 */ 101 .text 102 .align 32 103 ENTRY_NP(sha1_block_data_order) 104 mov %rsp, RSPSAVE 105 sub $FRAME_SIZE, %rsp 106 and $~0xF, %rsp 107 108 shl $6, NUM_BLKS /* convert to bytes */ 109 jz .Ldone_hash 110 add DATA_PTR, NUM_BLKS /* pointer to end of data */ 111 112 /* load initial hash values */ 113 pinsrd $3, 1*16(DIGEST_PTR), E0 114 movdqu 0*16(DIGEST_PTR), ABCD 115 pand UPPER_WORD_MASK(%rip), E0 116 pshufd $0x1B, ABCD, ABCD 117 118 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 119 120 .Lloop0: 121 /* Save hash values for addition after rounds */ 122 movdqa E0, (0*16)(%rsp) 123 movdqa ABCD, (1*16)(%rsp) 124 125 /* Rounds 0-3 */ 126 movdqu 0*16(DATA_PTR), MSG0 127 pshufb SHUF_MASK, MSG0 128 paddd MSG0, E0 129 movdqa ABCD, E1 130 sha1rnds4 $0, E0, ABCD 131 132 /* Rounds 4-7 */ 133 movdqu 1*16(DATA_PTR), MSG1 134 pshufb SHUF_MASK, MSG1 135 sha1nexte MSG1, E1 136 movdqa ABCD, E0 137 sha1rnds4 $0, E1, ABCD 138 sha1msg1 MSG1, MSG0 139 140 /* Rounds 8-11 */ 141 movdqu 2*16(DATA_PTR), MSG2 142 pshufb SHUF_MASK, MSG2 143 sha1nexte MSG2, E0 144 movdqa ABCD, E1 145 sha1rnds4 $0, E0, ABCD 146 sha1msg1 MSG2, MSG1 147 pxor MSG2, MSG0 148 149 /* Rounds 12-15 */ 150 movdqu 3*16(DATA_PTR), MSG3 151 pshufb SHUF_MASK, MSG3 152 sha1nexte MSG3, E1 153 movdqa ABCD, E0 154 sha1msg2 MSG3, MSG0 155 sha1rnds4 $0, E1, ABCD 156 sha1msg1 MSG3, MSG2 157 pxor MSG3, MSG1 158 159 /* Rounds 16-19 */ 160 sha1nexte MSG0, E0 161 movdqa ABCD, E1 162 sha1msg2 MSG0, MSG1 163 sha1rnds4 $0, E0, ABCD 164 sha1msg1 MSG0, MSG3 165 pxor MSG0, MSG2 166 167 /* Rounds 20-23 */ 168 sha1nexte MSG1, E1 169 movdqa ABCD, E0 170 sha1msg2 MSG1, MSG2 171 sha1rnds4 $1, E1, ABCD 172 sha1msg1 MSG1, MSG0 173 pxor MSG1, MSG3 174 175 /* Rounds 24-27 */ 176 sha1nexte MSG2, E0 177 movdqa ABCD, E1 178 sha1msg2 MSG2, MSG3 179 sha1rnds4 $1, E0, ABCD 180 sha1msg1 MSG2, MSG1 181 pxor MSG2, MSG0 182 183 /* Rounds 28-31 */ 184 sha1nexte MSG3, E1 185 movdqa ABCD, E0 186 sha1msg2 MSG3, MSG0 187 sha1rnds4 $1, E1, ABCD 188 sha1msg1 MSG3, MSG2 189 pxor MSG3, MSG1 190 191 /* Rounds 32-35 */ 192 sha1nexte MSG0, E0 193 movdqa ABCD, E1 194 sha1msg2 MSG0, MSG1 195 sha1rnds4 $1, E0, ABCD 196 sha1msg1 MSG0, MSG3 197 pxor MSG0, MSG2 198 199 /* Rounds 36-39 */ 200 sha1nexte MSG1, E1 201 movdqa ABCD, E0 202 sha1msg2 MSG1, MSG2 203 sha1rnds4 $1, E1, ABCD 204 sha1msg1 MSG1, MSG0 205 pxor MSG1, MSG3 206 207 /* Rounds 40-43 */ 208 sha1nexte MSG2, E0 209 movdqa ABCD, E1 210 sha1msg2 MSG2, MSG3 211 sha1rnds4 $2, E0, ABCD 212 sha1msg1 MSG2, MSG1 213 pxor MSG2, MSG0 214 215 /* Rounds 44-47 */ 216 sha1nexte MSG3, E1 217 movdqa ABCD, E0 218 sha1msg2 MSG3, MSG0 219 sha1rnds4 $2, E1, ABCD 220 sha1msg1 MSG3, MSG2 221 pxor MSG3, MSG1 222 223 /* Rounds 48-51 */ 224 sha1nexte MSG0, E0 225 movdqa ABCD, E1 226 sha1msg2 MSG0, MSG1 227 sha1rnds4 $2, E0, ABCD 228 sha1msg1 MSG0, MSG3 229 pxor MSG0, MSG2 230 231 /* Rounds 52-55 */ 232 sha1nexte MSG1, E1 233 movdqa ABCD, E0 234 sha1msg2 MSG1, MSG2 235 sha1rnds4 $2, E1, ABCD 236 sha1msg1 MSG1, MSG0 237 pxor MSG1, MSG3 238 239 /* Rounds 56-59 */ 240 sha1nexte MSG2, E0 241 movdqa ABCD, E1 242 sha1msg2 MSG2, MSG3 243 sha1rnds4 $2, E0, ABCD 244 sha1msg1 MSG2, MSG1 245 pxor MSG2, MSG0 246 247 /* Rounds 60-63 */ 248 sha1nexte MSG3, E1 249 movdqa ABCD, E0 250 sha1msg2 MSG3, MSG0 251 sha1rnds4 $3, E1, ABCD 252 sha1msg1 MSG3, MSG2 253 pxor MSG3, MSG1 254 255 /* Rounds 64-67 */ 256 sha1nexte MSG0, E0 257 movdqa ABCD, E1 258 sha1msg2 MSG0, MSG1 259 sha1rnds4 $3, E0, ABCD 260 sha1msg1 MSG0, MSG3 261 pxor MSG0, MSG2 262 263 /* Rounds 68-71 */ 264 sha1nexte MSG1, E1 265 movdqa ABCD, E0 266 sha1msg2 MSG1, MSG2 267 sha1rnds4 $3, E1, ABCD 268 pxor MSG1, MSG3 269 270 /* Rounds 72-75 */ 271 sha1nexte MSG2, E0 272 movdqa ABCD, E1 273 sha1msg2 MSG2, MSG3 274 sha1rnds4 $3, E0, ABCD 275 276 /* Rounds 76-79 */ 277 sha1nexte MSG3, E1 278 movdqa ABCD, E0 279 sha1rnds4 $3, E1, ABCD 280 281 /* Add current hash values with previously saved */ 282 sha1nexte (0*16)(%rsp), E0 283 paddd (1*16)(%rsp), ABCD 284 285 /* Increment data pointer and loop if more to process */ 286 add $64, DATA_PTR 287 cmp NUM_BLKS, DATA_PTR 288 jne .Lloop0 289 290 /* Write hash values back in the correct order */ 291 pshufd $0x1B, ABCD, ABCD 292 movdqu ABCD, 0*16(DIGEST_PTR) 293 pextrd $3, E0, 1*16(DIGEST_PTR) 294 295 .Ldone_hash: 296 mov RSPSAVE, %rsp 297 298 ret 299 SET_SIZE(sha1_block_data_order) 300 301 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 302 .align 16 303 PSHUFFLE_BYTE_FLIP_MASK: 304 .octa 0x000102030405060708090a0b0c0d0e0f 305 306 .section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16 307 .align 16 308 UPPER_WORD_MASK: 309 .octa 0xFFFFFFFF000000000000000000000000