ความคิดเห็นที่ 53

ผม optimize ต่อจากเดิมอีกหน่อย 1. เพิ่มการบวกทีละ 2-6 ในกรณีที่แน่ใจว่าจะไม่ทำให้ข้ามค่า a ที่เราต้องการแน่ๆ เช่น สมมติว่าเศษของ a*pi/e ตัวปัจจุบัน (ผมใช้ตัวแปรชื่อ bn_x) มีค่าประมาณ 0.2 ถ้าเพิ่ม a ไปอีก 1 เศษก็เพิ่มแค่ทีละ 0.1557 แต่เราสามารถข้ามไปเลยคือเพิ่ม a ขึ้น 5 เลย เศษก็เพิ่มขึ้นเป็น 0.2 + 5*0.1557 หรือประมาณ 0.98 ซึ่งถ้าเพิ่ม a น้อยกว่านี้เศษก็จะไม่ใกล้จำนวนเต็มไปมากกว่านี้ 2. อีกอย่างคือทำเป็น 2 thread ด้วยครับ ใช้ Core2 Duo ให้เป็นประโยชน์
จะลด precision ลงก็ได้นะครับ 512-bit มันมากไป ถ้าเอาซัก 128-bit ก็ละเอียดประมาณ 38 หลักในฐานสิบ น่าจะเพียงพอสำหรับตัวเลขที่จะเอามาคูณ pi กับ e ที่น้อยกว่า 10^15 เหลือสำหรับเศษอีก 23 หลักไม่น่ามีปัญหา
compile ด้วยคำสั่งนี้ c:\util\MinGW\bin\g++ pie.cc -lgmpxx -lgmp -O3 -mthreads
โปรแกรมยาวมากๆ
========= pie.cc ========== #if !defined(_MT) #error requires a multithreaded C run-time library. #endif #define STRICT 1 #define WIN32_LEAN_AND_MEAN #include <windows.h> #include <process.h> #include <stdio.h> #include <conio.h> #include <time.h> #include <limits.h> #include <string> #include <list> #include <gmpxx.h> using namespace std; const char s_pi[] = "3." "14159265358979323846264338327950288419716939937510" "58209749445923078164062862089986280348253421170679" "82148086513282306647093844609550582231725359408128" "48111745028410270193852110555964462294895493038196" "44288109756659334461284756482337867831652712019091" "45648566923460348610454326648213393607260249141273"; const char s_e[] = "2." "71828182845904523536028747135266249775724709369995" "95749669676277240766303535475945713821785251664274" "27466391932003059921817413596629043572900334295260" "59563073813232862794349076323382988075319525101901" "15738341879307021540891499348841675092447614606680" "82264800168477411853742345442437107539077744992069"; //const char s_pi_e[] = "1." // "15572734979092171791009318331269629912085102316441" // "58204997065353272886318409169394401884342356735588" // "04486653687020700914219048007856360834437788613604" // "54410964513821969955576062268889509563770805852198" // "63512180585370723135556592133335409580733130728322" // "58994713597286225874168717495150020490242380114361"; const unsigned int myprecision = 512; const unsigned int bignum_size = myprecision/(sizeof(mp_limb_t)*8); typedef mp_limb_t bignum[bignum_size]; mpf_class f_pi(s_pi,myprecision), f_e(s_e,myprecision); mpf_class f_pi_e(f_pi/f_e,myprecision); list<mpz_class> list_good_a; time_t time_start; mpz_class next_free_a = 0; char num_to_add[0x10000]; bignum bn_0; bignum bn_fracx[7]; bool stop = false; HANDLE hmutex_list_good_a = NULL; HANDLE hmutex_next_free_a = NULL; HANDLE hmutex_print = NULL; mp_limb_t *zerobignum(mp_limb_t *dest) { for (int i=0; i<bignum_size; ++i) dest[ i ] = 0; return dest; } mp_limb_t *copybignum(mp_limb_t *dest, const mp_limb_t *src) { for (int i=0; i<bignum_size; ++i) dest[ i ] = src[ i ]; return dest; } mp_limb_t *frac2bn(mp_limb_t *dest, mpf_t src) { mp_limb_t *p; int icp,ize; int iend = src->_mp_size - src->_mp_exp; if (src->_mp_exp >= 0) ize = bignum_size; else ize = bignum_size+src->_mp_exp; int ibgn = iend - bignum_size; if (ibgn >= 0) { p = src->_mp_d+ibgn; icp = 0; } else { p = src->_mp_d; icp = -ibgn; } int i=0; for (; i<icp; ++i) dest[ i ] = 0; for (; i<ize; ++i) dest[ i ] = *(p++); for (; i<bignum_size; ++i) dest[ i ] = 0; return dest; } void printbn(bignum b, int n=bignum_size) { for (int d=bignum_size; d>0 && n>0; --n) printf("%08x",b[--d]); } mpf_class diff_pi_e_a(const mpz_class &a) { mpf_class b = a*f_pi_e; b -= trunc(b); if (b>0.5) return 1 - b; else return b; } int cmp_pi_e_a(const mpz_class &a1, const mpz_class &a2) { return cmp(diff_pi_e_a(a1),diff_pi_e_a(a2)); } void add_good_a(const mpz_class &a) { //lock (multithread) WaitForSingleObject(hmutex_list_good_a, INFINITE); mpf_class d = diff_pi_e_a(a); list<mpz_class>::iterator it = list_good_a.begin(); while (it != list_good_a.end() && *it > a) if (diff_pi_e_a(*it) >= d) { WaitForSingleObject(hmutex_print, INFINITE); gmp_printf("%14Zd erased.\n", (*it).get_mpz_t());
ReleaseMutex(hmutex_print); it = list_good_a.erase(it); } else { ++it; } if (it == list_good_a.end() || d < diff_pi_e_a(*it)) { list_good_a.insert(it,a); mpf_class b = a*f_pi_e; if (b-trunc(b) < 0.5) b = trunc(b); else b = trunc(b)+1; mpf_class dif = a*f_pi - b*f_e; WaitForSingleObject(hmutex_print, INFINITE); gmp_printf("%14Zd pi - %14.Ff e, diff=% 15.9Fg, time=%.fs\n", a.get_mpz_t(),b.get_mpf_t(),dif.get_mpf_t(), difftime(time(NULL),time_start) ); ReleaseMutex(hmutex_print); } //unlock (multithread) ReleaseMutex(hmutex_list_good_a); } mpz_class & get_good_a(mpz_class &a) { WaitForSingleObject(hmutex_list_good_a, INFINITE); a = list_good_a.front(); ReleaseMutex(hmutex_list_good_a); return a; } __stdcall unsigned threadfunc(void *arg) { (arg); bignum bn_x; zerobignum(bn_x); bignum bn_y; zerobignum(bn_y); bignum bn_minp; zerobignum(bn_minp); bn_minp[bignum_size-1] = INT_MAX; bignum bn_minn; zerobignum(bn_minn); bn_minn[bignum_size-1] = INT_MIN; mpz_class a, a_end, ga; mpf_class b(0,myprecision); while (!stop) { //lock (multithread) WaitForSingleObject(hmutex_list_good_a, INFINITE); a = next_free_a; next_free_a = a_end = (a + 0x800000); //unlock (multithread) ReleaseMutex(hmutex_list_good_a); //prepare b = a*f_pi_e; frac2bn(bn_x, b.get_mpf_t()); // real work loop for (; a<a_end; ) { bool sg = (int)(bn_x[bignum_size-1])<0; //gmp_printf("a=%.Zd sg=%d\n",a.get_mpz_t(),sg); //printf("bn_x =."); printbn(bn_x,4); printf("\n"); if ((sg ? mpn_cmp(bn_minn, bn_x, bignum_size) : mpn_cmp(bn_x, bn_minp, bignum_size) ) <0 ) { add_good_a(a); get_good_a(ga); b = ga*f_pi_e; frac2bn(bn_minp, b.get_mpf_t()); if ((int)(bn_minp[bignum_size-1]) < 0) mpn_sub_n(bn_minp, bn_0, bn_minp, bignum_size); mpn_sub_n(bn_minn, bn_0, bn_minp, bignum_size); //gmp_printf("g=%.Zd\nb=%.8Ff\n",ga.get_mpz_t(),b.get_mpf_t()); //printf("bn_minp=."); printbn(bn_minp,4); printf("\n"); //printf("bn_minn=."); printbn(bn_minn,4); printf("\n"); //if (getch() == 27) { stop = true; break; } } int add = num_to_add[((unsigned)(bn_x[bignum_size-1]))>>(GMP_LIMB_BITS-16)]; mpn_add_n(bn_x, bn_x, bn_fracx[add], bignum_size); a += add; } if (WaitForSingleObject(hmutex_print, 0)==WAIT_OBJECT_0) { gmp_printf("a=%Zd, time=%.fs\r",a.get_mpz_t(),difftime(time(NULL),time_start)); ReleaseMutex(hmutex_print); } if (kbhit()) { if (getch() == 27) stop = true; } } return 0; } int main (int argc, char *argv[]) { time(&time_start); mpf_set_default_prec(myprecision); // prepare zerobignum(bn_0); zerobignum(bn_fracx[0]); frac2bn(bn_fracx[1], f_pi_e.get_mpf_t()); { //mp_exp_t ex; //string s_frac = f_pi_e.get_str(ex,16,myprecision/4); //gmp_printf("%.8Ff\n%d %s\n",f_pi_e.get_mpf_t(),ex,s_frac.c_str()); //printf("bn_frac=."); printbn(bn_fracx[1],4); printf("\n"); for (int x=2; x<7; ++x) mpn_add_n(bn_fracx[x], bn_fracx[1], bn_fracx[x-1], bignum_size); for (int i=0,x=6; i<0x10000; ++i) { mp_limb_t tmp[1]; while (x>1 && mpn_add_1(tmp,bn_fracx[x]+(bignum_size-1),1,i<<(GMP_LIMB_BITS-16))!=0) --x; num_to_add[ i ] = x; } } // prepare sync printf("press ESC to stop.\n"); next_free_a = 1; hmutex_list_good_a = CreateMutex(NULL,false,NULL); hmutex_next_free_a = CreateMutex(NULL,false,NULL); hmutex_print = CreateMutex(NULL,false,NULL); // start unsigned idthr1; HANDLE hthr1; hthr1 = (HANDLE)_beginthreadex(NULL, 0, threadfunc, NULL, 0, &idthr1); if (hthr1 == 0) printf("Failed to create thread.\n"); Sleep(1000); threadfunc(NULL); // stop stop = true; if (hthr1) { WaitForSingleObject(hthr1, INFINITE); CloseHandle(hthr1); hthr1 = NULL; } // free resource if (hmutex_list_good_a) { CloseHandle(hmutex_list_good_a); hmutex_list_good_a = NULL; } if (hmutex_next_free_a) { CloseHandle(hmutex_next_free_a); hmutex_next_free_a = NULL; } if (hmutex_print ) { CloseHandle(hmutex_print ); hmutex_print = NULL; } // conclude printf("\n"); return 0; }
====================
จากคุณ :
ผลึกความคิด
- [
18 ม.ค. 52 22:43:04
]
|
|
|