Java 一个指针变量。这通过消除索引指令来优化代码。非常感谢。我在C中使用了这个。时间从约5400减少到约5350。当然,有一些改进。非常感谢。我在C中使用了这个。时间从约5400减少到约5350。当然,有一些改进。你能解释一下你在做什么以及这些指令来自哪里(编译
Java 一个指针变量。这通过消除索引指令来优化代码。非常感谢。我在C中使用了这个。时间从约5400减少到约5350。当然,有一些改进。非常感谢。我在C中使用了这个。时间从约5400减少到约5350。当然,有一些改进。你能解释一下你在做什么以及这些指令来自哪里(编译,java,c,multithreading,Java,C,Multithreading,一个指针变量。这通过消除索引指令来优化代码。非常感谢。我在C中使用了这个。时间从约5400减少到约5350。当然,有一些改进。非常感谢。我在C中使用了这个。时间从约5400减少到约5350。当然,有一些改进。你能解释一下你在做什么以及这些指令来自哪里(编译器、库等等)?@PaulOgilvie用AVX2 SIMD重新实现了OP的代码。这些指令是在2013后的CPU中内置的,并且由所有的现代C和C++编译器支持。正如您所看到的,性能差异是巨大的。非常感谢这段代码。我想我需要一个向导来理解这一点。你
一个指针变量。这通过消除索引指令来优化代码。非常感谢。我在C中使用了这个。时间从约5400减少到约5350。当然,有一些改进。非常感谢。我在C中使用了这个。时间从约5400减少到约5350。当然,有一些改进。你能解释一下你在做什么以及这些指令来自哪里(编译器、库等等)?@PaulOgilvie用AVX2 SIMD重新实现了OP的代码。这些指令是在2013后的CPU中内置的,并且由所有的现代C和C++编译器支持。正如您所看到的,性能差异是巨大的。非常感谢这段代码。我想我需要一个向导来理解这一点。你似乎对C语言的编码非常精通。请注意,你只能一次循环调用
rand()
,这也会有很大帮助:-@AmarjitSingh添加了更多的注释,希望现在更容易理解。你能解释一下你在做什么以及这些指令来自哪里(编译器、库等)?@PaulOgilvie用AVX2 SIMD重新实现了OP的代码。这些指令是在2013后的CPU中内置的,并且由所有的现代C和C++编译器支持。正如您所看到的,性能差异是巨大的。非常感谢这段代码。我想我需要一个向导来理解这一点。你似乎非常精通C语言的编码。请注意,你只能循环调用rand()
,这也会有很大帮助:-)@AmarjitSingh添加了更多注释,希望现在更容易理解。
public static void main(String[] args) {
Long startTime = System.currentTimeMillis();
String name = "abcdefghijklmnopqrstuvwxyz";
char[] stringCharArray = name.toCharArray();
Random random = new Random();
for (Integer i = 0; i <100000000; i++) {
{
for (int j = 0; j < random.nextInt(26) + 1; j++) {
if (stringCharArray[j] == 'z') {
stringCharArray[j] = 'a';
} else {
stringCharArray[j] = (char) (((int) (stringCharArray[j])) + 1);
}
}
}
}
Long endtime = System.currentTimeMillis();
System.out.println(endtime-startTime+" ms");
}
public static void main(String[] args) {
HashMap hashMap = new HashMap();
hashMap.put('a', 'b');
hashMap.put('b', 'c');
hashMap.put('c', 'd');
hashMap.put('d', 'e');
hashMap.put('e', 'f');
hashMap.put('f', 'g');
hashMap.put('g', 'h');
hashMap.put('h', 'i');
hashMap.put('i', 'j');
hashMap.put('j', 'k');
hashMap.put('k', 'l');
hashMap.put('l', 'm');
hashMap.put('m', 'n');
hashMap.put('n', 'o');
hashMap.put('o', 'p');
hashMap.put('p', 'q');
hashMap.put('q', 'r');
hashMap.put('r', 's');
hashMap.put('s', 't');
hashMap.put('t', 'u');
hashMap.put('u', 'v');
hashMap.put('v', 'w');
hashMap.put('w', 'x');
hashMap.put('x', 'y');
hashMap.put('y', 'z');
hashMap.put('z', 'a');
Long startTime = System.currentTimeMillis();
String name = "abcdefghijklmnopqrstuvwxyz";
char[] stringCharArray = name.toCharArray();
Random random = new Random();
for (Integer i = 0; i <100000000; i++) {
{
for (Integer j = 0; j < random.nextInt(26) + 1; j++) {
stringCharArray[j] = (char) hashMap.get(stringCharArray[j]);
}
}
}
Long endtime = System.currentTimeMillis();
System.out.println(endtime-startTime+" ms");
}
#include <stdio.h>
#include <time.h>
#include <zconf.h>
#include <stdlib.h>
int main() {
long start = clock();
char name[] = "abcdefghijklmnopqrstuvwxyz";
for (int i = 0; i <100000000; i++) {
{
for (int j = 0; j < rand() % 25; j++) {
if (name[j] == 'z') {
name[j] = 'a';
} else {
name[j] = (char) (((int) (name[j])) + 1);
}
}
}
}
long stop = clock();
printf("time taken = %ld sec \n",( stop-start)/1000);
}
char[] nextChar = {'b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','a'};
stringCharArray[j] = nextChar[j];//by index
stringCharArray[j] = nextChar[stringCharArray[j]-'a'];// using ascii
Long startTime = System.currentTimeMillis();
char[] nextChar = {'b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','a'};
String name = "abcdefghijklmnopqrstuvwxyz";
char[] stringCharArray = name.toCharArray();
Random random = new Random();
for (int i = 0; i <100000000; i++) {
{
for (int j = 0; j < random.nextInt(27); j++) {
stringCharArray[j] = nextChar[j];//by index
//stringCharArray[j] = nextChar[stringCharArray[j]-'a'];// using ascii
}
}
}
Long endtime = System.currentTimeMillis();
System.out.println(endtime-startTime+" ms");
#include <immintrin.h>
int main()
{
const __m256i compareConstant = _mm256_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 );
long start = clock();
char name[ 32 ] = "abcdefghijklmnopqrstuvwxyz";
// __m256i is a C name for AVX register.
// AVX registers are 32 bytes in size, so your 26 bytes string fits in just a single one.
// The following line loads your string from memory to that register.
__m256i n = _mm256_loadu_si256( ( const __m256i* )name );
for( int i = 0; i < 100000000; i++ )
{
// Increment the letters, all 32 of them.
// `_mm256_set1_epi8` creates a value with all 32 bytes set to the same value.
// `_mm256_add_epi8` adds one set of 32 signed bytes to another set of 32 signed bytes.
// It's not a loop i.e. it's very fast, CPUs can actually run 2-3 such instructions per cycle.
__m256i n2 = _mm256_add_epi8( n, _mm256_set1_epi8( 1 ) );
// Wrap any > 'z' letters back to 'a'
// _mm256_cmpgt_epi8 compares one set of bytes to another set, for `>`.
// When it's `>` the result byte is set to 0xFF, when it's `<=` the result byte is 0.
// _mm256_blendv_epi8 combines bytes from 2 registers based on the bytes from the third one.
// In this case, the third one is the result of the comparison.
n2 = _mm256_blendv_epi8( n2, _mm256_set1_epi8( 'a' ), _mm256_cmpgt_epi8( n2, _mm256_set1_epi8( 'z' ) ) );
// Combine incremented ones with old, using random number of first characters
const int r = rand() % 25;
// This sets all 32 bytes in rv to the random number r
__m256i rv = _mm256_broadcastb_epi8( _mm_cvtsi32_si128( r ) );
// Compares all bytes in rv with the constant value [0, 1, 2, 3, ...]
// For bytes where r>cc, blendv_epi8 will select a byte from n2.
// For bytes where r<=cc, n will not change because blendv_epi8 will select an old value.
n = _mm256_blendv_epi8( n, n2, _mm256_cmpgt_epi8( rv, compareConstant ) );
}
long stop = clock();
printf( "time taken = %ld sec \n", ( stop - start ) / 1000 );
}