/* * PCG Random Number Generation for C++ * * Copyright 2014 Melissa O'Neill * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For additional information about the PCG random number generation scheme, * including its license and other licensing options, visit * * http://www.pcg-random.org */ /* * This file provides support code that is useful for random-number generation * but not specific to the PCG generation scheme, including: * - 128-bit int support for platforms where it isn't available natively * - bit twiddling operations * - I/O of 128-bit and 8-bit integers * - Handling the evilness of SeedSeq * - Support for efficiently producing random numbers less than a given * bound */ #ifndef PCG_EXTRAS_HPP_INCLUDED #define PCG_EXTRAS_HPP_INCLUDED 1 #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __GNUC__ #include #endif /* * Abstractions for compiler-specific directives */ #ifdef __GNUC__ #define PCG_NOINLINE __attribute__((noinline)) #else #define PCG_NOINLINE #endif /* * Some members of the PCG library use 128-bit math. When compiling on 64-bit * platforms, both GCC and Clang provide 128-bit integer types that are ideal * for the job. * * On 32-bit platforms (or with other compilers), we fall back to a C++ * class that provides 128-bit unsigned integers instead. It may seem * like we're reinventing the wheel here, because libraries already exist * that support large integers, but most existing libraries provide a very * generic multiprecision code, but here we're operating at a fixed size. * Also, most other libraries are fairly heavyweight. So we use a direct * implementation. Sadly, it's much slower than hand-coded assembly or * direct CPU support. * */ #if __SIZEOF_INT128__ namespace pcg_extras { typedef __uint128_t pcg128_t; } #define PCG_128BIT_CONSTANT(high,low) \ ((pcg128_t(high) << 64) + low) #else #include "pcg_uint128.hpp" namespace pcg_extras { typedef pcg_extras::uint_x4 pcg128_t; } #define PCG_128BIT_CONSTANT(high,low) \ pcg128_t(high,low) #define PCG_EMULATED_128BIT_MATH 1 #endif namespace pcg_extras { /* * We often need to represent a "number of bits". When used normally, these * numbers are never greater than 128, so an unsigned char is plenty. * If you're using a nonstandard generator of a larger size, you can set * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers * might produce faster code if you set it to an unsigned int.) */ #ifndef PCG_BITCOUNT_T typedef uint8_t bitcount_t; #else typedef PCG_BITCOUNT_T bitcount_t; #endif /* * C++ requires us to be able to serialize RNG state by printing or reading * it from a stream. Because we use 128-bit ints, we also need to be able * ot print them, so here is code to do so. * * This code provides enough functionality to print 128-bit ints in decimal * and zero-padded in hex. It's not a full-featured implementation. */ template std::basic_ostream& operator<<(std::basic_ostream& out, pcg128_t value) { auto desired_base = out.flags() & out.basefield; bool want_hex = desired_base == out.hex; if (want_hex) { uint64_t highpart = uint64_t(value >> 64); uint64_t lowpart = uint64_t(value); auto desired_width = out.width(); if (desired_width > 16) { out.width(desired_width - 16); } if (highpart != 0 || desired_width > 16) out << highpart; CharT oldfill; if (highpart != 0) { out.width(16); oldfill = out.fill('0'); } auto oldflags = out.setf(decltype(desired_base){}, out.showbase); out << lowpart; out.setf(oldflags); if (highpart != 0) { out.fill(oldfill); } return out; } constexpr size_t MAX_CHARS_128BIT = 40; char buffer[MAX_CHARS_128BIT]; char* pos = buffer+sizeof(buffer); *(--pos) = '\0'; constexpr auto BASE = pcg128_t(10ULL); do { auto div = value / BASE; auto mod = uint32_t(value - (div * BASE)); *(--pos) = '0' + mod; value = div; } while(value != pcg128_t(0ULL)); return out << pos; } template std::basic_istream& operator>>(std::basic_istream& in, pcg128_t& value) { typename std::basic_istream::sentry s(in); if (!s) return in; constexpr auto BASE = pcg128_t(10ULL); pcg128_t current(0ULL); bool did_nothing = true; bool overflow = false; for(;;) { CharT wide_ch = in.get(); if (!in.good()) break; auto ch = in.narrow(wide_ch, '\0'); if (ch < '0' || ch > '9') { in.unget(); break; } did_nothing = false; pcg128_t digit(uint32_t(ch - '0')); pcg128_t timesbase = current*BASE; overflow = overflow || timesbase < current; current = timesbase + digit; overflow = overflow || current < digit; } if (did_nothing || overflow) { in.setstate(std::ios::failbit); if (overflow) current = ~pcg128_t(0ULL); } value = current; return in; } /* * Likewise, if people use tiny rngs, we'll be serializing uint8_t. * If we just used the provided IO operators, they'd read/write chars, * not ints, so we need to define our own. We *can* redefine this operator * here because we're in our own namespace. */ template std::basic_ostream& operator<<(std::basic_ostream&out, uint8_t value) { return out << uint32_t(value); } template std::basic_istream& operator>>(std::basic_istream& in, uint8_t target) { uint32_t value = 0xdecea5edU; in >> value; if (!in && value == 0xdecea5edU) return in; if (value > uint8_t(~0)) { in.setstate(std::ios::failbit); value = ~0U; } target = uint8_t(value); return in; } /* Unfortunately, the above functions don't get found in preference to the * built in ones, so we create some more specific overloads that will. * Ugh. */ inline std::ostream& operator<<(std::ostream& out, uint8_t value) { return pcg_extras::operator<< (out, value); } inline std::istream& operator>>(std::istream& in, uint8_t& value) { return pcg_extras::operator>> (in, value); } /* * Useful bitwise operations. */ /* * XorShifts are invertable, but they are someting of a pain to invert. * This function backs them out. It's used by the whacky "inside out" * generator defined later. */ template inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift) { if (2*shift >= bits) { return x ^ (x >> shift); } itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1; itype highmask1 = ~lowmask1; itype top1 = x; itype bottom1 = x & lowmask1; top1 ^= top1 >> shift; top1 &= highmask1; x = top1 | bottom1; itype lowmask2 = (itype(1U) << (bits - shift)) - 1; itype bottom2 = x & lowmask2; bottom2 = unxorshift(bottom2, bits - shift, shift); bottom2 &= lowmask1; return top1 | bottom2; } /* * Rotate left and right. * * In ideal world, compilers would spot idiomatic rotate code and convert it * to a rotate instruction. Of course, opinions vary on what the correct * idiom is and how to spot it. For clang, sometimes it generates better * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM. */ template inline itype rotl(itype value, bitcount_t rot) { constexpr bitcount_t bits = sizeof(itype) * 8; constexpr bitcount_t mask = bits - 1; #if PCG_USE_ZEROCHECK_ROTATE_IDIOM return rot ? (value << rot) | (value >> (bits - rot)) : value; #else return (value << rot) | (value >> ((- rot) & mask)); #endif } template inline itype rotr(itype value, bitcount_t rot) { constexpr bitcount_t bits = sizeof(itype) * 8; constexpr bitcount_t mask = bits - 1; #if PCG_USE_ZEROCHECK_ROTATE_IDIOM return rot ? (value >> rot) | (value << (bits - rot)) : value; #else return (value >> rot) | (value << ((- rot) & mask)); #endif } /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes * to properly recognizing idiomatic rotate code, so for we also provide * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. * (I hope that these compilers get better so that this code can die.) * * These overloads will be preferred over the general template code above. */ #if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__) inline uint8_t rotr(uint8_t value, bitcount_t rot) { asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); return value; } inline uint16_t rotr(uint16_t value, bitcount_t rot) { asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); return value; } inline uint32_t rotr(uint32_t value, bitcount_t rot) { asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); return value; } #if __x86_64__ inline uint64_t rotr(uint64_t value, bitcount_t rot) { asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); return value; } #endif // __x86_64__ #endif // PCG_USE_INLINE_ASM /* * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of * 32-bit integers with seed data, but sometimes we want to produce * larger or smaller integers. * * The following code handles this annoyance. * * uneven_copy will copy an array of 32-bit ints to an array of larger or * smaller ints (actually, the code is general it only needing forward * iterators). The copy is identical to the one that would be performed if * we just did memcpy on a standard little-endian machine, but works * regardless of the endian of the machine (or the weirdness of the ints * involved). * * generate_to initializes an array of integers using a SeedSeq * object. It is given the size as a static constant at compile time and * tries to avoid memory allocation. If we're filling in 32-bit constants * we just do it directly. If we need a separate buffer and it's small, * we allocate it on the stack. Otherwise, we fall back to heap allocation. * Ugh. * * generate_one produces a single value of some integral type using a * SeedSeq object. */ /* uneven_copy helper, case where destination ints are less than 32 bit. */ template SrcIter uneven_copy_impl( SrcIter src_first, DestIter dest_first, DestIter dest_last, std::true_type) { typedef typename std::iterator_traits::value_type src_t; typedef typename std::iterator_traits::value_type dest_t; constexpr bitcount_t SRC_SIZE = sizeof(src_t); constexpr bitcount_t DEST_SIZE = sizeof(dest_t); constexpr bitcount_t DEST_BITS = DEST_SIZE * 8; constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE; size_t count = 0; src_t value; while (dest_first != dest_last) { if ((count++ % SCALE) == 0) value = *src_first++; // Get more bits else value >>= DEST_BITS; // Move down bits *dest_first++ = dest_t(value); // Truncates, ignores high bits. } return src_first; } /* uneven_copy helper, case where destination ints are more than 32 bit. */ template SrcIter uneven_copy_impl( SrcIter src_first, DestIter dest_first, DestIter dest_last, std::false_type) { typedef typename std::iterator_traits::value_type src_t; typedef typename std::iterator_traits::value_type dest_t; constexpr auto SRC_SIZE = sizeof(src_t); constexpr auto SRC_BITS = SRC_SIZE * 8; constexpr auto DEST_SIZE = sizeof(dest_t); constexpr auto SCALE = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE; while (dest_first != dest_last) { dest_t value(0UL); unsigned int shift = 0; for (size_t i = 0; i < SCALE; ++i) { value |= dest_t(*src_first++) << shift; shift += SRC_BITS; } *dest_first++ = value; } return src_first; } /* uneven_copy, call the right code for larger vs. smaller */ template inline SrcIter uneven_copy(SrcIter src_first, DestIter dest_first, DestIter dest_last) { typedef typename std::iterator_traits::value_type src_t; typedef typename std::iterator_traits::value_type dest_t; constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t); return uneven_copy_impl(src_first, dest_first, dest_last, std::integral_constant{}); } /* generate_to, fill in a fixed-size array of integral type using a SeedSeq * (actually works for any random-access iterator) */ template inline void generate_to_impl(SeedSeq&& generator, DestIter dest, std::true_type) { generator.generate(dest, dest+size); } template void generate_to_impl(SeedSeq&& generator, DestIter dest, std::false_type) { typedef typename std::iterator_traits::value_type dest_t; constexpr auto DEST_SIZE = sizeof(dest_t); constexpr auto GEN_SIZE = sizeof(uint32_t); constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE; constexpr size_t FROM_ELEMS = GEN_IS_SMALLER ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE) : (size + (GEN_SIZE / DEST_SIZE) - 1) / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER); // this odd code ^^^^^^^^^^^^^^^^^ is work-around for // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287 if (FROM_ELEMS <= 1024) { uint32_t buffer[FROM_ELEMS]; generator.generate(buffer, buffer+FROM_ELEMS); uneven_copy(buffer, dest, dest+size); } else { uint32_t* buffer = (uint32_t*) malloc(GEN_SIZE * FROM_ELEMS); generator.generate(buffer, buffer+FROM_ELEMS); uneven_copy(buffer, dest, dest+size); free(buffer); } } template inline void generate_to(SeedSeq&& generator, DestIter dest) { typedef typename std::iterator_traits::value_type dest_t; constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t); generate_to_impl(std::forward(generator), dest, std::integral_constant{}); } /* generate_one, produce a value of integral type using a SeedSeq * (optionally, we can have it produce more than one and pick which one * we want) */ template inline UInt generate_one(SeedSeq&& generator) { UInt result[N]; generate_to(std::forward(generator), result); return result[i]; } template auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) -> typename RngType::result_type { typedef typename RngType::result_type rtype; rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound) % upper_bound; for (;;) { rtype r = rng() - RngType::min(); if (r >= threshold) return r % upper_bound; } } template void shuffle(Iter from, Iter to, RandType&& rng) { typedef typename std::iterator_traits::difference_type delta_t; auto count = to - from; while (count > 1) { delta_t chosen(bounded_rand(rng, count)); --count; --to; using std::swap; swap(*(from+chosen), *to); } } /* * Although std::seed_seq is useful, it isn't everything. Often we want to * initialize a random-number generator some other way, such as from a random * device. * * Technically, it does not meet the requirements of a SeedSequence because * it lacks some of the rarely-used member functions (some of which would * be impossible to provide). However the C++ standard is quite specific * that actual engines only called the generate method, so it ought not to be * a problem in practice. */ template class seed_seq_from { private: RngType rng_; typedef uint_least32_t result_type; public: template seed_seq_from(Args&&... args) : rng_(std::forward(args)...) { // Nothing (else) to do... } template void generate(Iter start, Iter finish) { for (auto i = start; i != finish; ++i) *i = result_type(rng_()); } constexpr size_t size() const { return (sizeof(typename RngType::result_type) > sizeof(result_type) && RngType::max() > ~size_t(0UL)) ? ~size_t(0UL) : size_t(RngType::max()); } }; /* * Sometimes you might want a distinct seed based on when the program * was compiled. That way, a particular instance of the program will * behave the same way, but when recompiled it'll produce a different * value. */ template struct static_arbitrary_seed { private: static constexpr IntType fnv(IntType hash, const char* pos) { return *pos == '\0' ? hash : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1)); } public: static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)), __DATE__ __TIME__ __FILE__); }; // Sometimes, when debugging or testing, it's handy to be able print the name // of a (in human-readable form). This code allows the idiom: // // cout << printable_typename() // // to print out my_foo_type_t (or its concrete type if it is a synonym) template struct printable_typename {}; template std::ostream& operator<<(std::ostream& out, printable_typename) { const char *implementation_typename = typeid(T).name(); #ifdef __GNUC__ int status; const char* pretty_name = abi::__cxa_demangle(implementation_typename, NULL, NULL, &status); if (status == 0) out << pretty_name; free((void*) pretty_name); if (status == 0) return out; #endif out << implementation_typename; return out; } } // namespace pcg_extras #endif // PCG_EXTRAS_HPP_INCLUDED