pcg_extras.hpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. /*
  2. * PCG Random Number Generation for C++
  3. *
  4. * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org>
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. *
  18. * For additional information about the PCG random number generation scheme,
  19. * including its license and other licensing options, visit
  20. *
  21. * http://www.pcg-random.org
  22. */
  23. /*
  24. * This file provides support code that is useful for random-number generation
  25. * but not specific to the PCG generation scheme, including:
  26. * - 128-bit int support for platforms where it isn't available natively
  27. * - bit twiddling operations
  28. * - I/O of 128-bit and 8-bit integers
  29. * - Handling the evilness of SeedSeq
  30. * - Support for efficiently producing random numbers less than a given
  31. * bound
  32. */
  33. #ifndef PCG_EXTRAS_HPP_INCLUDED
  34. #define PCG_EXTRAS_HPP_INCLUDED 1
  35. #include <cinttypes>
  36. #include <cstddef>
  37. #include <cstdlib>
  38. #include <cstring>
  39. #include <cassert>
  40. #include <limits>
  41. #include <iostream>
  42. #include <type_traits>
  43. #include <utility>
  44. #include <locale>
  45. #include <iterator>
  46. #include <utility>
  47. #ifdef __GNUC__
  48. #include <cxxabi.h>
  49. #endif
  50. /*
  51. * Abstractions for compiler-specific directives
  52. */
  53. #ifdef __GNUC__
  54. #define PCG_NOINLINE __attribute__((noinline))
  55. #else
  56. #define PCG_NOINLINE
  57. #endif
  58. /*
  59. * Some members of the PCG library use 128-bit math. When compiling on 64-bit
  60. * platforms, both GCC and Clang provide 128-bit integer types that are ideal
  61. * for the job.
  62. *
  63. * On 32-bit platforms (or with other compilers), we fall back to a C++
  64. * class that provides 128-bit unsigned integers instead. It may seem
  65. * like we're reinventing the wheel here, because libraries already exist
  66. * that support large integers, but most existing libraries provide a very
  67. * generic multiprecision code, but here we're operating at a fixed size.
  68. * Also, most other libraries are fairly heavyweight. So we use a direct
  69. * implementation. Sadly, it's much slower than hand-coded assembly or
  70. * direct CPU support.
  71. *
  72. */
  73. #if __SIZEOF_INT128__
  74. namespace pcg_extras {
  75. typedef __uint128_t pcg128_t;
  76. }
  77. #define PCG_128BIT_CONSTANT(high,low) \
  78. ((pcg128_t(high) << 64) + low)
  79. #else
  80. #include "pcg_uint128.hpp"
  81. namespace pcg_extras {
  82. typedef pcg_extras::uint_x4<uint32_t,uint64_t> pcg128_t;
  83. }
  84. #define PCG_128BIT_CONSTANT(high,low) \
  85. pcg128_t(high,low)
  86. #define PCG_EMULATED_128BIT_MATH 1
  87. #endif
  88. namespace pcg_extras {
  89. /*
  90. * We often need to represent a "number of bits". When used normally, these
  91. * numbers are never greater than 128, so an unsigned char is plenty.
  92. * If you're using a nonstandard generator of a larger size, you can set
  93. * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers
  94. * might produce faster code if you set it to an unsigned int.)
  95. */
  96. #ifndef PCG_BITCOUNT_T
  97. typedef uint8_t bitcount_t;
  98. #else
  99. typedef PCG_BITCOUNT_T bitcount_t;
  100. #endif
  101. /*
  102. * C++ requires us to be able to serialize RNG state by printing or reading
  103. * it from a stream. Because we use 128-bit ints, we also need to be able
  104. * ot print them, so here is code to do so.
  105. *
  106. * This code provides enough functionality to print 128-bit ints in decimal
  107. * and zero-padded in hex. It's not a full-featured implementation.
  108. */
  109. template <typename CharT, typename Traits>
  110. std::basic_ostream<CharT,Traits>&
  111. operator<<(std::basic_ostream<CharT,Traits>& out, pcg128_t value)
  112. {
  113. auto desired_base = out.flags() & out.basefield;
  114. bool want_hex = desired_base == out.hex;
  115. if (want_hex) {
  116. uint64_t highpart = uint64_t(value >> 64);
  117. uint64_t lowpart = uint64_t(value);
  118. auto desired_width = out.width();
  119. if (desired_width > 16) {
  120. out.width(desired_width - 16);
  121. }
  122. if (highpart != 0 || desired_width > 16)
  123. out << highpart;
  124. CharT oldfill;
  125. if (highpart != 0) {
  126. out.width(16);
  127. oldfill = out.fill('0');
  128. }
  129. auto oldflags = out.setf(decltype(desired_base){}, out.showbase);
  130. out << lowpart;
  131. out.setf(oldflags);
  132. if (highpart != 0) {
  133. out.fill(oldfill);
  134. }
  135. return out;
  136. }
  137. constexpr size_t MAX_CHARS_128BIT = 40;
  138. char buffer[MAX_CHARS_128BIT];
  139. char* pos = buffer+sizeof(buffer);
  140. *(--pos) = '\0';
  141. constexpr auto BASE = pcg128_t(10ULL);
  142. do {
  143. auto div = value / BASE;
  144. auto mod = uint32_t(value - (div * BASE));
  145. *(--pos) = '0' + mod;
  146. value = div;
  147. } while(value != pcg128_t(0ULL));
  148. return out << pos;
  149. }
  150. template <typename CharT, typename Traits>
  151. std::basic_istream<CharT,Traits>&
  152. operator>>(std::basic_istream<CharT,Traits>& in, pcg128_t& value)
  153. {
  154. typename std::basic_istream<CharT,Traits>::sentry s(in);
  155. if (!s)
  156. return in;
  157. constexpr auto BASE = pcg128_t(10ULL);
  158. pcg128_t current(0ULL);
  159. bool did_nothing = true;
  160. bool overflow = false;
  161. for(;;) {
  162. CharT wide_ch = in.get();
  163. if (!in.good())
  164. break;
  165. auto ch = in.narrow(wide_ch, '\0');
  166. if (ch < '0' || ch > '9') {
  167. in.unget();
  168. break;
  169. }
  170. did_nothing = false;
  171. pcg128_t digit(uint32_t(ch - '0'));
  172. pcg128_t timesbase = current*BASE;
  173. overflow = overflow || timesbase < current;
  174. current = timesbase + digit;
  175. overflow = overflow || current < digit;
  176. }
  177. if (did_nothing || overflow) {
  178. in.setstate(std::ios::failbit);
  179. if (overflow)
  180. current = ~pcg128_t(0ULL);
  181. }
  182. value = current;
  183. return in;
  184. }
  185. /*
  186. * Likewise, if people use tiny rngs, we'll be serializing uint8_t.
  187. * If we just used the provided IO operators, they'd read/write chars,
  188. * not ints, so we need to define our own. We *can* redefine this operator
  189. * here because we're in our own namespace.
  190. */
  191. template <typename CharT, typename Traits>
  192. std::basic_ostream<CharT,Traits>&
  193. operator<<(std::basic_ostream<CharT,Traits>&out, uint8_t value)
  194. {
  195. return out << uint32_t(value);
  196. }
  197. template <typename CharT, typename Traits>
  198. std::basic_istream<CharT,Traits>&
  199. operator>>(std::basic_istream<CharT,Traits>& in, uint8_t target)
  200. {
  201. uint32_t value = 0xdecea5edU;
  202. in >> value;
  203. if (!in && value == 0xdecea5edU)
  204. return in;
  205. if (value > uint8_t(~0)) {
  206. in.setstate(std::ios::failbit);
  207. value = ~0U;
  208. }
  209. target = uint8_t(value);
  210. return in;
  211. }
  212. /* Unfortunately, the above functions don't get found in preference to the
  213. * built in ones, so we create some more specific overloads that will.
  214. * Ugh.
  215. */
  216. inline std::ostream& operator<<(std::ostream& out, uint8_t value)
  217. {
  218. return pcg_extras::operator<< <char>(out, value);
  219. }
  220. inline std::istream& operator>>(std::istream& in, uint8_t& value)
  221. {
  222. return pcg_extras::operator>> <char>(in, value);
  223. }
  224. /*
  225. * Useful bitwise operations.
  226. */
  227. /*
  228. * XorShifts are invertable, but they are someting of a pain to invert.
  229. * This function backs them out. It's used by the whacky "inside out"
  230. * generator defined later.
  231. */
  232. template <typename itype>
  233. inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift)
  234. {
  235. if (2*shift >= bits) {
  236. return x ^ (x >> shift);
  237. }
  238. itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1;
  239. itype highmask1 = ~lowmask1;
  240. itype top1 = x;
  241. itype bottom1 = x & lowmask1;
  242. top1 ^= top1 >> shift;
  243. top1 &= highmask1;
  244. x = top1 | bottom1;
  245. itype lowmask2 = (itype(1U) << (bits - shift)) - 1;
  246. itype bottom2 = x & lowmask2;
  247. bottom2 = unxorshift(bottom2, bits - shift, shift);
  248. bottom2 &= lowmask1;
  249. return top1 | bottom2;
  250. }
  251. /*
  252. * Rotate left and right.
  253. *
  254. * In ideal world, compilers would spot idiomatic rotate code and convert it
  255. * to a rotate instruction. Of course, opinions vary on what the correct
  256. * idiom is and how to spot it. For clang, sometimes it generates better
  257. * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM.
  258. */
  259. template <typename itype>
  260. inline itype rotl(itype value, bitcount_t rot)
  261. {
  262. constexpr bitcount_t bits = sizeof(itype) * 8;
  263. constexpr bitcount_t mask = bits - 1;
  264. #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
  265. return rot ? (value << rot) | (value >> (bits - rot)) : value;
  266. #else
  267. return (value << rot) | (value >> ((- rot) & mask));
  268. #endif
  269. }
  270. template <typename itype>
  271. inline itype rotr(itype value, bitcount_t rot)
  272. {
  273. constexpr bitcount_t bits = sizeof(itype) * 8;
  274. constexpr bitcount_t mask = bits - 1;
  275. #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
  276. return rot ? (value >> rot) | (value << (bits - rot)) : value;
  277. #else
  278. return (value >> rot) | (value << ((- rot) & mask));
  279. #endif
  280. }
  281. /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes
  282. * to properly recognizing idiomatic rotate code, so for we also provide
  283. * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss.
  284. * (I hope that these compilers get better so that this code can die.)
  285. *
  286. * These overloads will be preferred over the general template code above.
  287. */
  288. #if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__)
  289. inline uint8_t rotr(uint8_t value, bitcount_t rot)
  290. {
  291. asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
  292. return value;
  293. }
  294. inline uint16_t rotr(uint16_t value, bitcount_t rot)
  295. {
  296. asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
  297. return value;
  298. }
  299. inline uint32_t rotr(uint32_t value, bitcount_t rot)
  300. {
  301. asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
  302. return value;
  303. }
  304. #if __x86_64__
  305. inline uint64_t rotr(uint64_t value, bitcount_t rot)
  306. {
  307. asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
  308. return value;
  309. }
  310. #endif // __x86_64__
  311. #endif // PCG_USE_INLINE_ASM
  312. /*
  313. * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of
  314. * 32-bit integers with seed data, but sometimes we want to produce
  315. * larger or smaller integers.
  316. *
  317. * The following code handles this annoyance.
  318. *
  319. * uneven_copy will copy an array of 32-bit ints to an array of larger or
  320. * smaller ints (actually, the code is general it only needing forward
  321. * iterators). The copy is identical to the one that would be performed if
  322. * we just did memcpy on a standard little-endian machine, but works
  323. * regardless of the endian of the machine (or the weirdness of the ints
  324. * involved).
  325. *
  326. * generate_to initializes an array of integers using a SeedSeq
  327. * object. It is given the size as a static constant at compile time and
  328. * tries to avoid memory allocation. If we're filling in 32-bit constants
  329. * we just do it directly. If we need a separate buffer and it's small,
  330. * we allocate it on the stack. Otherwise, we fall back to heap allocation.
  331. * Ugh.
  332. *
  333. * generate_one produces a single value of some integral type using a
  334. * SeedSeq object.
  335. */
  336. /* uneven_copy helper, case where destination ints are less than 32 bit. */
  337. template<class SrcIter, class DestIter>
  338. SrcIter uneven_copy_impl(
  339. SrcIter src_first, DestIter dest_first, DestIter dest_last,
  340. std::true_type)
  341. {
  342. typedef typename std::iterator_traits<SrcIter>::value_type src_t;
  343. typedef typename std::iterator_traits<DestIter>::value_type dest_t;
  344. constexpr bitcount_t SRC_SIZE = sizeof(src_t);
  345. constexpr bitcount_t DEST_SIZE = sizeof(dest_t);
  346. constexpr bitcount_t DEST_BITS = DEST_SIZE * 8;
  347. constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE;
  348. size_t count = 0;
  349. src_t value;
  350. while (dest_first != dest_last) {
  351. if ((count++ % SCALE) == 0)
  352. value = *src_first++; // Get more bits
  353. else
  354. value >>= DEST_BITS; // Move down bits
  355. *dest_first++ = dest_t(value); // Truncates, ignores high bits.
  356. }
  357. return src_first;
  358. }
  359. /* uneven_copy helper, case where destination ints are more than 32 bit. */
  360. template<class SrcIter, class DestIter>
  361. SrcIter uneven_copy_impl(
  362. SrcIter src_first, DestIter dest_first, DestIter dest_last,
  363. std::false_type)
  364. {
  365. typedef typename std::iterator_traits<SrcIter>::value_type src_t;
  366. typedef typename std::iterator_traits<DestIter>::value_type dest_t;
  367. constexpr auto SRC_SIZE = sizeof(src_t);
  368. constexpr auto SRC_BITS = SRC_SIZE * 8;
  369. constexpr auto DEST_SIZE = sizeof(dest_t);
  370. constexpr auto SCALE = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE;
  371. while (dest_first != dest_last) {
  372. dest_t value(0UL);
  373. unsigned int shift = 0;
  374. for (size_t i = 0; i < SCALE; ++i) {
  375. value |= dest_t(*src_first++) << shift;
  376. shift += SRC_BITS;
  377. }
  378. *dest_first++ = value;
  379. }
  380. return src_first;
  381. }
  382. /* uneven_copy, call the right code for larger vs. smaller */
  383. template<class SrcIter, class DestIter>
  384. inline SrcIter uneven_copy(SrcIter src_first,
  385. DestIter dest_first, DestIter dest_last)
  386. {
  387. typedef typename std::iterator_traits<SrcIter>::value_type src_t;
  388. typedef typename std::iterator_traits<DestIter>::value_type dest_t;
  389. constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t);
  390. return uneven_copy_impl(src_first, dest_first, dest_last,
  391. std::integral_constant<bool, DEST_IS_SMALLER>{});
  392. }
  393. /* generate_to, fill in a fixed-size array of integral type using a SeedSeq
  394. * (actually works for any random-access iterator)
  395. */
  396. template <size_t size, typename SeedSeq, typename DestIter>
  397. inline void generate_to_impl(SeedSeq&& generator, DestIter dest,
  398. std::true_type)
  399. {
  400. generator.generate(dest, dest+size);
  401. }
  402. template <size_t size, typename SeedSeq, typename DestIter>
  403. void generate_to_impl(SeedSeq&& generator, DestIter dest,
  404. std::false_type)
  405. {
  406. typedef typename std::iterator_traits<DestIter>::value_type dest_t;
  407. constexpr auto DEST_SIZE = sizeof(dest_t);
  408. constexpr auto GEN_SIZE = sizeof(uint32_t);
  409. constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE;
  410. constexpr size_t FROM_ELEMS =
  411. GEN_IS_SMALLER
  412. ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE)
  413. : (size + (GEN_SIZE / DEST_SIZE) - 1)
  414. / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER);
  415. // this odd code ^^^^^^^^^^^^^^^^^ is work-around for
  416. // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287
  417. if (FROM_ELEMS <= 1024) {
  418. uint32_t buffer[FROM_ELEMS];
  419. generator.generate(buffer, buffer+FROM_ELEMS);
  420. uneven_copy(buffer, dest, dest+size);
  421. } else {
  422. uint32_t* buffer = (uint32_t*) malloc(GEN_SIZE * FROM_ELEMS);
  423. generator.generate(buffer, buffer+FROM_ELEMS);
  424. uneven_copy(buffer, dest, dest+size);
  425. free(buffer);
  426. }
  427. }
  428. template <size_t size, typename SeedSeq, typename DestIter>
  429. inline void generate_to(SeedSeq&& generator, DestIter dest)
  430. {
  431. typedef typename std::iterator_traits<DestIter>::value_type dest_t;
  432. constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t);
  433. generate_to_impl<size>(std::forward<SeedSeq>(generator), dest,
  434. std::integral_constant<bool, IS_32BIT>{});
  435. }
  436. /* generate_one, produce a value of integral type using a SeedSeq
  437. * (optionally, we can have it produce more than one and pick which one
  438. * we want)
  439. */
  440. template <typename UInt, size_t i = 0UL, size_t N = i+1UL, typename SeedSeq>
  441. inline UInt generate_one(SeedSeq&& generator)
  442. {
  443. UInt result[N];
  444. generate_to<N>(std::forward<SeedSeq>(generator), result);
  445. return result[i];
  446. }
  447. template <typename RngType>
  448. auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
  449. -> typename RngType::result_type
  450. {
  451. typedef typename RngType::result_type rtype;
  452. rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound)
  453. % upper_bound;
  454. for (;;) {
  455. rtype r = rng() - RngType::min();
  456. if (r >= threshold)
  457. return r % upper_bound;
  458. }
  459. }
  460. template <typename Iter, typename RandType>
  461. void shuffle(Iter from, Iter to, RandType&& rng)
  462. {
  463. typedef typename std::iterator_traits<Iter>::difference_type delta_t;
  464. auto count = to - from;
  465. while (count > 1) {
  466. delta_t chosen(bounded_rand(rng, count));
  467. --count;
  468. --to;
  469. using std::swap;
  470. swap(*(from+chosen), *to);
  471. }
  472. }
  473. /*
  474. * Although std::seed_seq is useful, it isn't everything. Often we want to
  475. * initialize a random-number generator some other way, such as from a random
  476. * device.
  477. *
  478. * Technically, it does not meet the requirements of a SeedSequence because
  479. * it lacks some of the rarely-used member functions (some of which would
  480. * be impossible to provide). However the C++ standard is quite specific
  481. * that actual engines only called the generate method, so it ought not to be
  482. * a problem in practice.
  483. */
  484. template <typename RngType>
  485. class seed_seq_from {
  486. private:
  487. RngType rng_;
  488. typedef uint_least32_t result_type;
  489. public:
  490. template<typename... Args>
  491. seed_seq_from(Args&&... args) :
  492. rng_(std::forward<Args>(args)...)
  493. {
  494. // Nothing (else) to do...
  495. }
  496. template<typename Iter>
  497. void generate(Iter start, Iter finish)
  498. {
  499. for (auto i = start; i != finish; ++i)
  500. *i = result_type(rng_());
  501. }
  502. constexpr size_t size() const
  503. {
  504. return (sizeof(typename RngType::result_type) > sizeof(result_type)
  505. && RngType::max() > ~size_t(0UL))
  506. ? ~size_t(0UL)
  507. : size_t(RngType::max());
  508. }
  509. };
  510. /*
  511. * Sometimes you might want a distinct seed based on when the program
  512. * was compiled. That way, a particular instance of the program will
  513. * behave the same way, but when recompiled it'll produce a different
  514. * value.
  515. */
  516. template <typename IntType>
  517. struct static_arbitrary_seed {
  518. private:
  519. static constexpr IntType fnv(IntType hash, const char* pos) {
  520. return *pos == '\0'
  521. ? hash
  522. : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1));
  523. }
  524. public:
  525. static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)),
  526. __DATE__ __TIME__ __FILE__);
  527. };
  528. // Sometimes, when debugging or testing, it's handy to be able print the name
  529. // of a (in human-readable form). This code allows the idiom:
  530. //
  531. // cout << printable_typename<my_foo_type_t>()
  532. //
  533. // to print out my_foo_type_t (or its concrete type if it is a synonym)
  534. template <typename T>
  535. struct printable_typename {};
  536. template <typename T>
  537. std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
  538. const char *implementation_typename = typeid(T).name();
  539. #ifdef __GNUC__
  540. int status;
  541. const char* pretty_name =
  542. abi::__cxa_demangle(implementation_typename, NULL, NULL, &status);
  543. if (status == 0)
  544. out << pretty_name;
  545. free((void*) pretty_name);
  546. if (status == 0)
  547. return out;
  548. #endif
  549. out << implementation_typename;
  550. return out;
  551. }
  552. } // namespace pcg_extras
  553. #endif // PCG_EXTRAS_HPP_INCLUDED