mirror of
https://github.com/eddyem/eddys_snippets.git
synced 2026-03-22 01:31:16 +03:00
add two avx instructions example & simple clear screen
This commit is contained in:
32
avx/add.c
Normal file
32
avx/add.c
Normal file
@@ -0,0 +1,32 @@
|
||||
/* Construct a 256-bit vector from 4 64-bit doubles. Add it to itself
|
||||
* and print the result.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
int main() {
|
||||
|
||||
__m256i hello;
|
||||
// Construction from scalars or literals.
|
||||
__m256d a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
|
||||
|
||||
// Does GCC generate the correct mov, or (better yet) elide the copy
|
||||
// and pass two of the same register into the add? Let's look at the assembly.
|
||||
__m256d b = _mm256_set_pd(0.0, 0.0, 0.0, 0.0), c;
|
||||
for(int i = 0; i < 1000000000; ++i){
|
||||
// Add the two vectors, interpreting the bits as 4 double-precision
|
||||
// floats.
|
||||
c = _mm256_add_pd(a, b);
|
||||
b = c;
|
||||
|
||||
}
|
||||
|
||||
// Do we ever touch DRAM or will these four be registers?
|
||||
__attribute__ ((aligned (32))) double output[4];
|
||||
_mm256_store_pd(output, c);
|
||||
|
||||
printf("%f %f %f %f\n",
|
||||
output[0], output[1], output[2], output[3]);
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user