""" Utilties for casting numpy values in various ways Most routines work round some numpy oddities in floating point precision and casting. Others work round numpy casting to and from python ints """ from platform import processor, machine import numpy as np class CastingError(Exception): pass def float_to_int(arr, int_type, nan2zero=True, infmax=False): """ Convert floating point array `arr` to type `int_type` * Rounds numbers to nearest integer * Clips values to prevent overflows when casting * Converts NaN to 0 (for `nan2zero`==True Casting floats to integers is delicate because the result is undefined and platform specific for float values outside the range of `int_type`. Define ``shared_min`` to be the minimum value that can be exactly represented in both the float type of `arr` and `int_type`. Define `shared_max` to be the equivalent maximum value. To avoid undefined results we threshold `arr` at ``shared_min`` and ``shared_max``. Parameters ---------- arr : array-like Array of floating point type int_type : object Numpy integer type nan2zero : {True, False, None} Whether to convert NaN value to zero. Default is True. If False, and NaNs are present, raise CastingError. If None, do not check for NaN values and pass through directly to the ``astype`` casting mechanism. In this last case, the resulting value is undefined. infmax : {False, True} If True, set np.inf values in `arr` to be `int_type` integer maximum value, -np.inf as `int_type` integer minimum. If False, set +/- infs to be ``shared_min``, ``shared_max`` as defined above. Therefore False gives faster conversion at the expense of infs that are further from infinity. Returns ------- iarr : ndarray of type `int_type` Examples -------- >>> float_to_int([np.nan, np.inf, -np.inf, 1.1, 6.6], np.int16) array([ 0, 32767, -32768, 1, 7], dtype=int16) Notes ----- Numpy relies on the C library to cast from float to int using the standard ``astype`` method of the array. Quoting from section F4 of the C99 standard: If the floating value is infinite or NaN or if the integral part of the floating value exceeds the range of the integer type, then the "invalid" floating-point exception is raised and the resulting value is unspecified. Hence we threshold at ``shared_min`` and ``shared_max`` to avoid casting to values that are undefined. See: http://en.wikipedia.org/wiki/C99 . There are links to the C99 standard from that page. """ arr = np.asarray(arr) flt_type = arr.dtype.type int_type = np.dtype(int_type).type # Deal with scalar as input; fancy indexing needs 1D shape = arr.shape arr = np.atleast_1d(arr) mn, mx = shared_range(flt_type, int_type) if nan2zero is None: seen_nans = False else: nans = np.isnan(arr) seen_nans = np.any(nans) if nan2zero == False and seen_nans: raise CastingError('NaNs in array, nan2zero is False') iarr = np.clip(np.rint(arr), mn, mx).astype(int_type) if seen_nans: iarr[nans] = 0 if not infmax: return iarr.reshape(shape) ii = np.iinfo(int_type) iarr[arr == np.inf] = ii.max if ii.min != int(mn): iarr[arr == -np.inf] = ii.min return iarr.reshape(shape) # Cache range values _SHARED_RANGES = {} def shared_range(flt_type, int_type): """ Min and max in float type that are >=min, <=max in integer type This is not as easy as it sounds, because the float type may not be able to exactly represent the max or min integer values, so we have to find the next exactly representable floating point value to do the thresholding. Parameters ---------- flt_type : dtype specifier A dtype specifier referring to a numpy floating point type. For example, ``f4``, ``np.dtype('f4')``, ``np.float32`` are equivalent. int_type : dtype specifier A dtype specifier referring to a numpy integer type. For example, ``i4``, ``np.dtype('i4')``, ``np.int32`` are equivalent Returns ------- mn : object Number of type `flt_type` that is the minumum value in the range of `int_type`, such that ``mn.astype(int_type)`` >= min of `int_type` mx : object Number of type `flt_type` that is the maximum value in the range of `int_type`, such that ``mx.astype(int_type)`` <= max of `int_type` Examples -------- >>> shared_range(np.float32, np.int32) == (-2147483648.0, 2147483520.0) True >>> shared_range('f4', 'i4') == (-2147483648.0, 2147483520.0) True """ flt_type = np.dtype(flt_type).type int_type = np.dtype(int_type).type key = (flt_type, int_type) # Used cached value if present try: return _SHARED_RANGES[key] except KeyError: pass ii = np.iinfo(int_type) fi = np.finfo(flt_type) mn = ceil_exact(ii.min, flt_type) if mn == -np.inf: mn = fi.min mx = floor_exact(ii.max, flt_type) if mx == np.inf: mx = fi.max _SHARED_RANGES[key] = (mn, mx) return mn, mx # ---------------------------------------------------------------------------- # Routines to work out the next lowest representable integer in floating point # types. # ---------------------------------------------------------------------------- try: _float16 = np.float16 except AttributeError: # float16 not present in np < 1.6 _float16 = None class FloatingError(Exception): pass def on_powerpc(): """ True if we are running on a Power PC platform Has to deal with older Macs and IBM POWER7 series among others """ return processor() == 'powerpc' or machine().startswith('ppc') def type_info(np_type): """ Return dict with min, max, nexp, nmant, width for numpy type `np_type` Type can be integer in which case nexp and nmant are None. Parameters ---------- np_type : numpy type specifier Any specifier for a numpy dtype Returns ------- info : dict with fields ``min`` (minimum value), ``max`` (maximum value), ``nexp`` (exponent width), ``nmant`` (significand precision not including implicit first digit), ``minexp`` (minimum exponent), ``maxexp`` (maximum exponent), ``width`` (width in bytes). (``nexp``, ``nmant``, ``minexp``, ``maxexp``) are None for integer types. Both ``min`` and ``max`` are of type `np_type`. Raises ------ FloatingError : for floating point types we don't recognize Notes ----- You might be thinking that ``np.finfo`` does this job, and it does, except for PPC long doubles (http://projects.scipy.org/numpy/ticket/2077) and float96 on Windows compiled with Mingw. This routine protects against such errors in ``np.finfo`` by only accepting values that we know are likely to be correct. """ dt = np.dtype(np_type) np_type = dt.type width = dt.itemsize try: # integer type info = np.iinfo(dt) except ValueError: pass else: return dict(min=np_type(info.min), max=np_type(info.max), minexp=None, maxexp=None, nmant=None, nexp=None, width=width) info = np.finfo(dt) # Trust the standard IEEE types nmant, nexp = info.nmant, info.nexp ret = dict(min=np_type(info.min), max=np_type(info.max), nmant=nmant, nexp=nexp, minexp=info.minexp, maxexp=info.maxexp, width=width) if np_type in (_float16, np.float32, np.float64, np.complex64, np.complex128): return ret info_64 = np.finfo(np.float64) if dt.kind == 'c': assert np_type is np.longcomplex vals = (nmant, nexp, width / 2) else: assert np_type is np.longdouble vals = (nmant, nexp, width) if vals in ((112, 15, 16), # binary128 (info_64.nmant, info_64.nexp, 8), # float64 (63, 15, 12), (63, 15, 16)): # Intel extended 80 return ret # these are OK without modification # The remaining types are longdoubles with bad finfo values. Some we # correct, others we wait to hear of errors. # We start with float64 as basis ret = type_info(np.float64) if vals in ((52, 15, 12), # windows float96 (52, 15, 16)): # windows float128? # On windows 32 bit at least, float96 is Intel 80 storage but operating # at float64 precision. The finfo values give nexp == 15 (as for intel # 80) but in calculations nexp in fact appears to be 11 as for float64 ret.update(dict(width=width)) return ret # Oh dear, we don't recognize the type information. Try some known types # and then give up. At this stage we're expecting exotic longdouble or their # complex equivalent. if not np_type in (np.longdouble, np.longcomplex) or width not in (16, 32): raise FloatingError('We had not expected type %s' % np_type) if (vals == (1, 1, 16) and on_powerpc() and _check_maxexp(np.longdouble, 1024)): # double pair on PPC. The _check_nmant routine does not work for this # type, hence the powerpc platform check instead ret.update(dict(nmant = 106, width=width)) elif (_check_nmant(np.longdouble, 52) and _check_maxexp(np.longdouble, 11)): # Got float64 despite everything pass elif (_check_nmant(np.longdouble, 112) and _check_maxexp(np.longdouble, 16384)): # binary 128, but with some busted type information. np.longcomplex # seems to break here too, so we need to use np.longdouble and # complexify two = np.longdouble(2) # See: http://matthew-brett.github.com/pydagogue/floating_point.html max_val = (two ** 113 - 1) / (two ** 112) * two ** 16383 if np_type is np.longcomplex: max_val += 0j ret = dict(min = -max_val, max= max_val, nmant = 112, nexp = 15, minexp = -16382, maxexp = 16384, width = width) else: # don't recognize the type raise FloatingError('We had not expected long double type %s ' 'with info %s' % (np_type, info)) return ret def _check_nmant(np_type, nmant): """ True if fp type `np_type` seems to have `nmant` significand digits Note 'digits' does not include implicit digits. And in fact if there are no implicit digits, the `nmant` number is one less than the actual digits. Assumes base 2 representation. Parameters ---------- np_type : numpy type specifier Any specifier for a numpy dtype nmant : int Number of digits to test against Returns ------- tf : bool True if `nmant` is the correct number of significand digits, false otherwise """ np_type = np.dtype(np_type).type max_contig = np_type(2 ** (nmant + 1)) # maximum of contiguous integers tests = max_contig + np.array([-2, -1, 0, 1, 2], dtype=np_type) return np.all(tests - max_contig == [-2, -1, 0, 0, 2]) def _check_maxexp(np_type, maxexp): """ True if fp type `np_type` seems to have `maxexp` maximum exponent We're testing "maxexp" as returned by numpy. This value is set to one greater than the maximum power of 2 that `np_type` can represent. Assumes base 2 representation. Very crude check Parameters ---------- np_type : numpy type specifier Any specifier for a numpy dtype maxexp : int Maximum exponent to test against Returns ------- tf : bool True if `maxexp` is the correct maximum exponent, False otherwise. """ dt = np.dtype(np_type) np_type = dt.type two = np_type(2).reshape((1,)) # to avoid upcasting return (np.isfinite(two ** (maxexp - 1)) and not np.isfinite(two ** maxexp)) def as_int(x, check=True): """ Return python integer representation of number This is useful because the numpy int(val) mechanism is broken for large values in np.longdouble. It is also useful to work around a numpy 1.4.1 bug in conversion of uints to python ints. This routine will still raise an OverflowError for values that are outside the range of float64. Parameters ---------- x : object integer, unsigned integer or floating point value check : {True, False} If True, raise error for values that are not integers Returns ------- i : int Python integer Examples -------- >>> as_int(2.0) 2 >>> as_int(-2.0) -2 >>> as_int(2.1) #doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... FloatingError: Not an integer: 2.1 >>> as_int(2.1, check=False) 2 """ x = np.array(x) if x.dtype.kind in 'iu': # This works around a nasty numpy 1.4.1 bug such that: # >>> int(np.uint32(2**32-1) # -1 return int(str(x)) ix = int(x) if ix == x: return ix fx = np.floor(x) if check and fx != x: raise FloatingError('Not an integer: %s' % x) if not fx.dtype.type == np.longdouble: return int(x) # Subtract float64 chunks until we have all of the number. If the int is too # large, it will overflow ret = 0 while fx != 0: f64 = np.float64(fx) fx -= f64 ret += int(f64) return ret def int_to_float(val, flt_type): """ Convert integer `val` to floating point type `flt_type` Why is this so complicated? At least in numpy <= 1.6.1, numpy longdoubles do not correctly convert to ints, and ints do not correctly convert to longdoubles. Specifically, in both cases, the values seem to go through float64 conversion on the way, so to convert better, we need to split into float64s and sum up the result. Parameters ---------- val : int Integer value flt_type : object numpy floating point type Returns ------- f : numpy scalar of type `flt_type` """ if not flt_type is np.longdouble: return flt_type(val) faval = np.longdouble(0) while val != 0: f64 = np.float64(val) faval += f64 val -= int(f64) return faval def floor_exact(val, flt_type): """ Return nearest exact integer <= `val` in float type `flt_type` Parameters ---------- val : int We have to pass val as an int rather than the floating point type because large integers cast as floating point may be rounded by the casting process. flt_type : numpy type numpy float type. Returns ------- floor_val : object value of same floating point type as `val`, that is the nearest exact integer in this type such that `floor_val` <= `val`. Thus if `val` is exact in `flt_type`, `floor_val` == `val`. Examples -------- Obviously 2 is within the range of representable integers for float32 >>> floor_exact(2, np.float32) 2.0 As is 2**24-1 (the number of significand digits is 23 + 1 implicit) >>> floor_exact(2**24-1, np.float32) == 2**24-1 True But 2**24+1 gives a number that float32 can't represent exactly >>> floor_exact(2**24+1, np.float32) == 2**24 True As for the numpy floor function, negatives floor towards -inf >>> floor_exact(-2**24-1, np.float32) == -2**24-2 True """ val = int(val) flt_type = np.dtype(flt_type).type sign = 1 if val > 0 else -1 try: # int_to_float deals with longdouble safely fval = int_to_float(val, flt_type) except OverflowError: return sign * np.inf if not np.isfinite(fval): return fval info = type_info(flt_type) diff = val - as_int(fval) if diff >= 0: # floating point value <= val return fval # Float casting made the value go up biggest_gap = 2**(floor_log2(val) - info['nmant']) assert biggest_gap > 1 fval -= flt_type(biggest_gap) return fval def ceil_exact(val, flt_type): """ Return nearest exact integer >= `val` in float type `flt_type` Parameters ---------- val : int We have to pass val as an int rather than the floating point type because large integers cast as floating point may be rounded by the casting process. flt_type : numpy type numpy float type. Returns ------- ceil_val : object value of same floating point type as `val`, that is the nearest exact integer in this type such that `floor_val` >= `val`. Thus if `val` is exact in `flt_type`, `ceil_val` == `val`. Examples -------- Obviously 2 is within the range of representable integers for float32 >>> ceil_exact(2, np.float32) 2.0 As is 2**24-1 (the number of significand digits is 23 + 1 implicit) >>> ceil_exact(2**24-1, np.float32) == 2**24-1 True But 2**24+1 gives a number that float32 can't represent exactly >>> ceil_exact(2**24+1, np.float32) == 2**24+2 True As for the numpy ceil function, negatives ceil towards inf >>> ceil_exact(-2**24-1, np.float32) == -2**24 True """ return -floor_exact(-val, flt_type) def int_abs(arr): """ Absolute values of array taking care of max negative int values Parameters ---------- arr : array-like Returns ------- abs_arr : array array the same shape as `arr` in which all negative numbers have been changed to positive numbers with the magnitude. Examples -------- This kind of thing is confusing in base numpy: >>> import numpy as np >>> np.abs(np.int8(-128)) -128 ``int_abs`` fixes that: >>> int_abs(np.int8(-128)) 128 >>> int_abs(np.array([-128, 127], dtype=np.int8)) array([128, 127], dtype=uint8) >>> int_abs(np.array([-128, 127], dtype=np.float32)) array([ 128., 127.], dtype=float32) """ arr = np.array(arr, copy=False) dt = arr.dtype if dt.kind == 'u': return arr if dt.kind != 'i': return np.absolute(arr) out = arr.astype(np.dtype(dt.str.replace('i', 'u'))) return np.choose(arr < 0, (arr, arr * -1), out=out) def floor_log2(x): """ floor of log2 of abs(`x`) Embarrassingly, from http://en.wikipedia.org/wiki/Binary_logarithm Parameters ---------- x : int Returns ------- L : None or int floor of base 2 log of `x`. None if `x` == 0. Examples -------- >>> floor_log2(2**9+1) 9 >>> floor_log2(-2**9+1) 8 >>> floor_log2(0.5) -1 >>> floor_log2(0) is None True """ ip = 0 rem = abs(x) if rem > 1: while rem>=2: ip += 1 rem //= 2 return ip elif rem == 0: return None while rem < 1: ip -= 1 rem *= 2 return ip def best_float(): """ Floating point type with best precision This is nearly always np.longdouble, except on Windows, where np.longdouble is Intel80 storage, but with float64 precision for calculations. In that case we return float64 on the basis it's the fastest and smallest at the highest precision. Returns ------- best_type : numpy type floating point type with highest precision """ if (type_info(np.longdouble)['nmant'] > type_info(np.float64)['nmant'] and machine() != 'sparc64'): # sparc has crazy-slow float128 return np.longdouble return np.float64 def have_binary128(): """ True if we have a binary128 IEEE longdouble """ ti = type_info(np.longdouble) return (ti['nmant'], ti['maxexp']) == (112, 16384) def ok_floats(): """ Return floating point types sorted by precision Remove longdouble if it has no higher precision than float64 """ floats = sorted(np.sctypes['float'], key=lambda f : type_info(f)['nmant']) if best_float() != np.longdouble and np.longdouble in floats: floats.remove(np.longdouble) return floats OK_FLOATS = ok_floats() def able_int_type(values): """ Find the smallest integer numpy type to contain sequence `values` Prefers uint to int if minimum is >= 0 Parameters ---------- values : sequence sequence of integer values Returns ------- itype : None or numpy type numpy integer type or None if no integer type holds all `values` Examples -------- >>> able_int_type([0, 1]) == np.uint8 True >>> able_int_type([-1, 1]) == np.int8 True """ if any([v % 1 for v in values]): return None mn = min(values) mx = max(values) if mn >= 0: for ityp in np.sctypes['uint']: if mx <= np.iinfo(ityp).max: return ityp for ityp in np.sctypes['int']: info = np.iinfo(ityp) if mn >= info.min and mx <= info.max: return ityp return None def ulp(val=np.float64(1.0)): """ Return gap between `val` and nearest representable number of same type This is the value of a unit in the last place (ULP), and is similar in meaning to the MATLAB eps function. Parameters ---------- val : scalar, optional scalar value of any numpy type. Default is 1.0 (float64) Returns ------- ulp_val : scalar gap between `val` and nearest representable number of same type Notes ----- The wikipedia article on machine epsilon points out that the term *epsilon* can be used in the sense of a unit in the last place (ULP), or as the maximum relative rounding error. The MATLAB ``eps`` function uses the ULP meaning, but this function is ``ulp`` rather than ``eps`` to avoid confusion between different meanings of *eps*. """ val = np.array(val) if not np.isfinite(val): return np.nan if val.dtype.kind in 'iu': return 1 aval = np.abs(val) info = type_info(val.dtype) fl2 = floor_log2(aval) if fl2 is None or fl2 < info['minexp']: # subnormal fl2 = info['minexp'] # 'nmant' value does not include implicit first bit return 2**(fl2 - info['nmant'])