1// Copyright 2014 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:build ppc64 || ppc64le 6 7#include "textflag.h" 8 9// For more details about how various memory models are 10// enforced on POWER, the following paper provides more 11// details about how they enforce C/C++ like models. This 12// gives context about why the strange looking code 13// sequences below work. 14// 15// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 16 17// uint32 ·Load(uint32 volatile* ptr) 18TEXT ·Load(SB),NOSPLIT|NOFRAME,$-8-12 19 MOVD ptr+0(FP), R3 20 SYNC 21 MOVWZ 0(R3), R3 22 CMPW R3, R3, CR7 23 BC 4, 30, 1(PC) // bne- cr7,0x4 24 ISYNC 25 MOVW R3, ret+8(FP) 26 RET 27 28// uint8 ·Load8(uint8 volatile* ptr) 29TEXT ·Load8(SB),NOSPLIT|NOFRAME,$-8-9 30 MOVD ptr+0(FP), R3 31 SYNC 32 MOVBZ 0(R3), R3 33 CMP R3, R3, CR7 34 BC 4, 30, 1(PC) // bne- cr7,0x4 35 ISYNC 36 MOVB R3, ret+8(FP) 37 RET 38 39// uint64 ·Load64(uint64 volatile* ptr) 40TEXT ·Load64(SB),NOSPLIT|NOFRAME,$-8-16 41 MOVD ptr+0(FP), R3 42 SYNC 43 MOVD 0(R3), R3 44 CMP R3, R3, CR7 45 BC 4, 30, 1(PC) // bne- cr7,0x4 46 ISYNC 47 MOVD R3, ret+8(FP) 48 RET 49 50// void *·Loadp(void *volatile *ptr) 51TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$-8-16 52 MOVD ptr+0(FP), R3 53 SYNC 54 MOVD 0(R3), R3 55 CMP R3, R3, CR7 56 BC 4, 30, 1(PC) // bne- cr7,0x4 57 ISYNC 58 MOVD R3, ret+8(FP) 59 RET 60 61// uint32 ·LoadAcq(uint32 volatile* ptr) 62TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12 63 MOVD ptr+0(FP), R3 64 MOVWZ 0(R3), R3 65 CMPW R3, R3, CR7 66 BC 4, 30, 1(PC) // bne- cr7, 0x4 67 ISYNC 68 MOVW R3, ret+8(FP) 69 RET 70 71// uint64 ·LoadAcq64(uint64 volatile* ptr) 72TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16 73 MOVD ptr+0(FP), R3 74 MOVD 0(R3), R3 75 CMP R3, R3, CR7 76 BC 4, 30, 1(PC) // bne- cr7, 0x4 77 ISYNC 78 MOVD R3, ret+8(FP) 79 RET 80 81// bool cas(uint32 *ptr, uint32 old, uint32 new) 82// Atomically: 83// if(*val == old){ 84// *val = new; 85// return 1; 86// } else 87// return 0; 88TEXT ·Cas(SB), NOSPLIT, $0-17 89 MOVD ptr+0(FP), R3 90 MOVWZ old+8(FP), R4 91 MOVWZ new+12(FP), R5 92 LWSYNC 93cas_again: 94 LWAR (R3), R6 95 CMPW R6, R4 96 BNE cas_fail 97 STWCCC R5, (R3) 98 BNE cas_again 99 MOVD $1, R3 100 LWSYNC 101 MOVB R3, ret+16(FP) 102 RET 103cas_fail: 104 LWSYNC 105 MOVB R0, ret+16(FP) 106 RET 107 108// bool ·Cas64(uint64 *ptr, uint64 old, uint64 new) 109// Atomically: 110// if(*val == old){ 111// *val = new; 112// return 1; 113// } else { 114// return 0; 115// } 116TEXT ·Cas64(SB), NOSPLIT, $0-25 117 MOVD ptr+0(FP), R3 118 MOVD old+8(FP), R4 119 MOVD new+16(FP), R5 120 LWSYNC 121cas64_again: 122 LDAR (R3), R6 123 CMP R6, R4 124 BNE cas64_fail 125 STDCCC R5, (R3) 126 BNE cas64_again 127 MOVD $1, R3 128 LWSYNC 129 MOVB R3, ret+24(FP) 130 RET 131cas64_fail: 132 LWSYNC 133 MOVB R0, ret+24(FP) 134 RET 135 136TEXT ·CasRel(SB), NOSPLIT, $0-17 137 MOVD ptr+0(FP), R3 138 MOVWZ old+8(FP), R4 139 MOVWZ new+12(FP), R5 140 LWSYNC 141cas_again: 142 LWAR (R3), $0, R6 // 0 = Mutex release hint 143 CMPW R6, R4 144 BNE cas_fail 145 STWCCC R5, (R3) 146 BNE cas_again 147 MOVD $1, R3 148 MOVB R3, ret+16(FP) 149 RET 150cas_fail: 151 MOVB R0, ret+16(FP) 152 RET 153 154TEXT ·Casint32(SB), NOSPLIT, $0-17 155 BR ·Cas(SB) 156 157TEXT ·Casint64(SB), NOSPLIT, $0-25 158 BR ·Cas64(SB) 159 160TEXT ·Casuintptr(SB), NOSPLIT, $0-25 161 BR ·Cas64(SB) 162 163TEXT ·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16 164 BR ·Load64(SB) 165 166TEXT ·LoadAcquintptr(SB), NOSPLIT|NOFRAME, $0-16 167 BR ·LoadAcq64(SB) 168 169TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16 170 BR ·Load64(SB) 171 172TEXT ·Storeint32(SB), NOSPLIT, $0-12 173 BR ·Store(SB) 174 175TEXT ·Storeint64(SB), NOSPLIT, $0-16 176 BR ·Store64(SB) 177 178TEXT ·Storeuintptr(SB), NOSPLIT, $0-16 179 BR ·Store64(SB) 180 181TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16 182 BR ·StoreRel64(SB) 183 184TEXT ·Xadduintptr(SB), NOSPLIT, $0-24 185 BR ·Xadd64(SB) 186 187TEXT ·Loadint32(SB), NOSPLIT, $0-12 188 BR ·Load(SB) 189 190TEXT ·Loadint64(SB), NOSPLIT, $0-16 191 BR ·Load64(SB) 192 193TEXT ·Xaddint32(SB), NOSPLIT, $0-20 194 BR ·Xadd(SB) 195 196TEXT ·Xaddint64(SB), NOSPLIT, $0-24 197 BR ·Xadd64(SB) 198 199// bool casp(void **val, void *old, void *new) 200// Atomically: 201// if(*val == old){ 202// *val = new; 203// return 1; 204// } else 205// return 0; 206TEXT ·Casp1(SB), NOSPLIT, $0-25 207 BR ·Cas64(SB) 208 209// uint32 xadd(uint32 volatile *ptr, int32 delta) 210// Atomically: 211// *val += delta; 212// return *val; 213TEXT ·Xadd(SB), NOSPLIT, $0-20 214 MOVD ptr+0(FP), R4 215 MOVW delta+8(FP), R5 216 LWSYNC 217 LWAR (R4), R3 218 ADD R5, R3 219 STWCCC R3, (R4) 220 BNE -3(PC) 221 MOVW R3, ret+16(FP) 222 RET 223 224// uint64 Xadd64(uint64 volatile *val, int64 delta) 225// Atomically: 226// *val += delta; 227// return *val; 228TEXT ·Xadd64(SB), NOSPLIT, $0-24 229 MOVD ptr+0(FP), R4 230 MOVD delta+8(FP), R5 231 LWSYNC 232 LDAR (R4), R3 233 ADD R5, R3 234 STDCCC R3, (R4) 235 BNE -3(PC) 236 MOVD R3, ret+16(FP) 237 RET 238 239// uint32 Xchg(ptr *uint32, new uint32) 240// Atomically: 241// old := *ptr; 242// *ptr = new; 243// return old; 244TEXT ·Xchg(SB), NOSPLIT, $0-20 245 MOVD ptr+0(FP), R4 246 MOVW new+8(FP), R5 247 LWSYNC 248 LWAR (R4), R3 249 STWCCC R5, (R4) 250 BNE -2(PC) 251 ISYNC 252 MOVW R3, ret+16(FP) 253 RET 254 255// uint64 Xchg64(ptr *uint64, new uint64) 256// Atomically: 257// old := *ptr; 258// *ptr = new; 259// return old; 260TEXT ·Xchg64(SB), NOSPLIT, $0-24 261 MOVD ptr+0(FP), R4 262 MOVD new+8(FP), R5 263 LWSYNC 264 LDAR (R4), R3 265 STDCCC R5, (R4) 266 BNE -2(PC) 267 ISYNC 268 MOVD R3, ret+16(FP) 269 RET 270 271TEXT ·Xchgint32(SB), NOSPLIT, $0-20 272 BR ·Xchg(SB) 273 274TEXT ·Xchgint64(SB), NOSPLIT, $0-24 275 BR ·Xchg64(SB) 276 277TEXT ·Xchguintptr(SB), NOSPLIT, $0-24 278 BR ·Xchg64(SB) 279 280TEXT ·StorepNoWB(SB), NOSPLIT, $0-16 281 BR ·Store64(SB) 282 283TEXT ·Store(SB), NOSPLIT, $0-12 284 MOVD ptr+0(FP), R3 285 MOVW val+8(FP), R4 286 SYNC 287 MOVW R4, 0(R3) 288 RET 289 290TEXT ·Store8(SB), NOSPLIT, $0-9 291 MOVD ptr+0(FP), R3 292 MOVB val+8(FP), R4 293 SYNC 294 MOVB R4, 0(R3) 295 RET 296 297TEXT ·Store64(SB), NOSPLIT, $0-16 298 MOVD ptr+0(FP), R3 299 MOVD val+8(FP), R4 300 SYNC 301 MOVD R4, 0(R3) 302 RET 303 304TEXT ·StoreRel(SB), NOSPLIT, $0-12 305 MOVD ptr+0(FP), R3 306 MOVW val+8(FP), R4 307 LWSYNC 308 MOVW R4, 0(R3) 309 RET 310 311TEXT ·StoreRel64(SB), NOSPLIT, $0-16 312 MOVD ptr+0(FP), R3 313 MOVD val+8(FP), R4 314 LWSYNC 315 MOVD R4, 0(R3) 316 RET 317 318// void ·Or8(byte volatile*, byte); 319TEXT ·Or8(SB), NOSPLIT, $0-9 320 MOVD ptr+0(FP), R3 321 MOVBZ val+8(FP), R4 322 LWSYNC 323again: 324 LBAR (R3), R6 325 OR R4, R6 326 STBCCC R6, (R3) 327 BNE again 328 RET 329 330// void ·And8(byte volatile*, byte); 331TEXT ·And8(SB), NOSPLIT, $0-9 332 MOVD ptr+0(FP), R3 333 MOVBZ val+8(FP), R4 334 LWSYNC 335again: 336 LBAR (R3), R6 337 AND R4, R6 338 STBCCC R6, (R3) 339 BNE again 340 RET 341 342// func Or(addr *uint32, v uint32) 343TEXT ·Or(SB), NOSPLIT, $0-12 344 MOVD ptr+0(FP), R3 345 MOVW val+8(FP), R4 346 LWSYNC 347again: 348 LWAR (R3), R6 349 OR R4, R6 350 STWCCC R6, (R3) 351 BNE again 352 RET 353 354// func And(addr *uint32, v uint32) 355TEXT ·And(SB), NOSPLIT, $0-12 356 MOVD ptr+0(FP), R3 357 MOVW val+8(FP), R4 358 LWSYNC 359again: 360 LWAR (R3),R6 361 AND R4, R6 362 STWCCC R6, (R3) 363 BNE again 364 RET 365 366// func Or32(addr *uint32, v uint32) old uint32 367TEXT ·Or32(SB), NOSPLIT, $0-20 368 MOVD ptr+0(FP), R3 369 MOVW val+8(FP), R4 370 LWSYNC 371again: 372 LWAR (R3), R6 373 OR R4, R6, R7 374 STWCCC R7, (R3) 375 BNE again 376 MOVW R6, ret+16(FP) 377 RET 378 379// func And32(addr *uint32, v uint32) old uint32 380TEXT ·And32(SB), NOSPLIT, $0-20 381 MOVD ptr+0(FP), R3 382 MOVW val+8(FP), R4 383 LWSYNC 384again: 385 LWAR (R3),R6 386 AND R4, R6, R7 387 STWCCC R7, (R3) 388 BNE again 389 MOVW R6, ret+16(FP) 390 RET 391 392// func Or64(addr *uint64, v uint64) old uint64 393TEXT ·Or64(SB), NOSPLIT, $0-24 394 MOVD ptr+0(FP), R3 395 MOVD val+8(FP), R4 396 LWSYNC 397again: 398 LDAR (R3), R6 399 OR R4, R6, R7 400 STDCCC R7, (R3) 401 BNE again 402 MOVD R6, ret+16(FP) 403 RET 404 405// func And64(addr *uint64, v uint64) old uint64 406TEXT ·And64(SB), NOSPLIT, $0-24 407 MOVD ptr+0(FP), R3 408 MOVD val+8(FP), R4 409 LWSYNC 410again: 411 LDAR (R3),R6 412 AND R4, R6, R7 413 STDCCC R7, (R3) 414 BNE again 415 MOVD R6, ret+16(FP) 416 RET 417 418// func Anduintptr(addr *uintptr, v uintptr) old uintptr 419TEXT ·Anduintptr(SB), NOSPLIT, $0-24 420 JMP ·And64(SB) 421 422// func Oruintptr(addr *uintptr, v uintptr) old uintptr 423TEXT ·Oruintptr(SB), NOSPLIT, $0-24 424 JMP ·Or64(SB) 425