diff --git a/accelerate.cabal b/accelerate.cabal index 8a2d3d736..63d91665c 100644 --- a/accelerate.cabal +++ b/accelerate.cabal @@ -251,12 +251,8 @@ flag debug * @exp-sharing@: Enable sharing recovery of scalar expressions (True). . * @fusion@: Enable array fusion (True). - . - * @simplify@: Enable program simplification phase (True). . * @inplace@: Enable in-place array updates (True). - . - * @flush-cache@: Clear any persistent caches on program startup (False). . * @force-recomp@: Force recompilation of array programs (False). . @@ -313,17 +309,18 @@ flag debug flag bounds-checks manual: True default: True - description: Enable bounds checking + description: Enable bounds checking in the interpreter -flag unsafe-checks - manual: True - default: False - description: Enable bounds checking in unsafe operations +-- This flag is currently completely unused, so let's not expose it to users +-- flag unsafe-checks +-- manual: True +-- default: False +-- description: Enable bounds checking in unsafe operations flag internal-checks manual: True default: False - description: Enable internal consistency checks + description: Enable some internal consistency checks -- Enabling this drastically increases build times -- See: https://gitlab.haskell.org/ghc/ghc/issues/15751 @@ -621,9 +618,9 @@ library cpp-options: -DACCELERATE_BOUNDS_CHECKS - if flag(unsafe-checks) - cpp-options: - -DACCELERATE_UNSAFE_CHECKS + -- if flag(unsafe-checks) + -- cpp-options: + -- -DACCELERATE_UNSAFE_CHECKS if flag(internal-checks) cpp-options: diff --git a/cbits/flags.c b/cbits/flags.c index 7ecd4486f..cd29901b4 100644 --- a/cbits/flags.c +++ b/cbits/flags.c @@ -30,25 +30,39 @@ #include "getopt.h" -/* These globals will be accessed from the Haskell side to implement the - * corresponding behaviour. - */ +/* SEE: [layout of command line options bitfield] + * There are 7 default-enabled options, followed by 1 default-disabled option, + * followed by 17 debug options. + * Note the bit trick: ((1 << n) - 1) is a number with the lowest n bits set. */ +static const uint32_t def_enabled_opts_bitfield = { (1<<7) - 1 }; +#ifndef ACCELERATE_DEBUG +static const uint32_t debug_opts_bitfield = { ((1<<17) - 1) << (7+1) }; +#endif +static const int disable_opts_offset = (7+1+17); -__flags_t __cmd_line_flags = { 0xff }; // SEE: [layout of command line options bitfield] -uint32_t __unfolding_use_threshold = 1; -uint32_t __max_simplifier_iterations = 25; +/* This global is accessed from the Haskell side. */ +__flags_t __cmd_line_flags = { def_enabled_opts_bitfield }; enum { OPT_ENABLE = 1, OPT_DISABLE, - OPT_UNFOLDING_USE_THRESHOLD, - OPT_MAX_SIMPLIFIER_ITERATIONS }; /* NOTE: [layout of command line options bitfield] * - * When adding new options, make sure the offset value in the OPT_DISABLE branch - * is updated, and that the flags are kept in order. + * HERE BE DRAGONS. + * + * When adding, removing, reordering, or changing options in ANY way, be aware + * of the following: + * - Various code relies on the fact (by bit hacks) that these options come in + * this order: -f enablers, -d enablers, -f disablers. + * - The -f enablers and -f disablers lists must be exactly the same, including + * the order. + * - The order of the options in __flags_t in flags.h must also be the same. + * - Data.Array.Accelerate.Debug.Internal.Flags contains 2 blocks of code + * hard-coding offsets into this options list. + * - Some metrics about this options list used in the bit hacks are at the top + * of this file (def_enabled_opts_bitfield etc.). */ static const char* shortopts = ""; static const struct option longopts[] = @@ -56,11 +70,9 @@ static const struct option longopts[] = , { "facc-sharing", no_argument, NULL, OPT_ENABLE } , { "fexp-sharing", no_argument, NULL, OPT_ENABLE } , { "ffusion", no_argument, NULL, OPT_ENABLE } - , { "fsimplify", no_argument, NULL, OPT_ENABLE } , { "finplace", no_argument, NULL, OPT_ENABLE } , { "ffast-math", no_argument, NULL, OPT_ENABLE } , { "ffast-permute-const", no_argument, NULL, OPT_ENABLE } - , { "fflush-cache", no_argument, NULL, OPT_ENABLE } , { "fforce-recomp", no_argument, NULL, OPT_ENABLE } , { "ddebug", no_argument, NULL, OPT_ENABLE } @@ -85,15 +97,13 @@ static const struct option longopts[] = , { "fno-acc-sharing", no_argument, NULL, OPT_DISABLE } , { "fno-exp-sharing", no_argument, NULL, OPT_DISABLE } , { "fno-fusion", no_argument, NULL, OPT_DISABLE } - , { "fno-simplify", no_argument, NULL, OPT_DISABLE } , { "fno-inplace", no_argument, NULL, OPT_DISABLE } , { "fno-fast-math", no_argument, NULL, OPT_DISABLE } , { "fno-fast-permute-const", no_argument, NULL, OPT_DISABLE } - , { "fno-flush-cache", no_argument, NULL, OPT_DISABLE } , { "fno-force-recomp", no_argument, NULL, OPT_DISABLE } - , { "funfolding-use-threshold=INT", required_argument, NULL, OPT_UNFOLDING_USE_THRESHOLD } - , { "fmax-simplifier-iterations=INT", required_argument, NULL, OPT_MAX_SIMPLIFIER_ITERATIONS } + /* There were options that took arguments here before; see the git blame of + * this comment for how that looked. */ /* required sentinel */ , { NULL, 0, NULL, 0 } @@ -126,28 +136,7 @@ static void parse_options(int argc, char *argv[]) break; case OPT_DISABLE: - __cmd_line_flags.bitfield &= ~(1 << (longindex - 27)); // SEE: [layout of command line options bitfield] - break; - - /* attempt to decode the argument to flags which require them */ - case OPT_UNFOLDING_USE_THRESHOLD: - if (1 != sscanf(optarg, "%"PRIu32, &__unfolding_use_threshold)) { - fprintf(stderr, "%s: option `-%s' requires an integer argument, but got: %s\n" - , basename(argv[0]) - , longopts[longindex].name - , optarg - ); - } - break; - - case OPT_MAX_SIMPLIFIER_ITERATIONS: - if (1 != sscanf(optarg, "%"PRIu32, &__max_simplifier_iterations)) { - fprintf(stderr, "%s: option `-%s' requires an integer argument, but got: %s\n" - , basename(argv[0]) - , longopts[longindex].name - , optarg - ); - } + __cmd_line_flags.bitfield &= ~(1 << (longindex - disable_opts_offset)); break; /* option was ambiguous or was missing a required argument @@ -196,7 +185,7 @@ static void parse_options(int argc, char *argv[]) } } #if !defined(ACCELERATE_DEBUG) - if (__cmd_line_flags.bitfield & 0x7fffc00) { // SEE: [layout of command line options bitfield] + if (__cmd_line_flags.bitfield & debug_opts_bitfield) { fprintf(stderr, "Data.Array.Accelerate: Debugging options are disabled.\n"); fprintf(stderr, "Reinstall package 'accelerate' with '-fdebug' to enable them.\n"); } diff --git a/cbits/flags.h b/cbits/flags.h index a355b7684..7e64b89b0 100644 --- a/cbits/flags.h +++ b/cbits/flags.h @@ -11,6 +11,8 @@ #ifndef __ACCELERATE_FLAGS_H__ #define __ACCELERATE_FLAGS_H__ +#include + /* NOTE: [layout of command line options bitfield] */ typedef union { @@ -21,11 +23,9 @@ typedef union { uint32_t acc_sharing : 1; uint32_t exp_sharing : 1; uint32_t fusion : 1; - uint32_t simplify : 1; uint32_t inplace : 1; uint32_t fast_math : 1; uint32_t fast_permute_const : 1; - uint32_t flush_cache : 1; uint32_t force_recomp : 1; uint32_t debug : 1; diff --git a/src/Data/Array/Accelerate/Debug/Internal.hs b/src/Data/Array/Accelerate/Debug/Internal.hs index e0617ff99..08eaf8899 100644 --- a/src/Data/Array/Accelerate/Debug/Internal.hs +++ b/src/Data/Array/Accelerate/Debug/Internal.hs @@ -18,7 +18,7 @@ module Data.Array.Accelerate.Debug.Internal ( debuggingIsEnabled, boundsChecksAreEnabled, - unsafeChecksAreEnabled, + -- unsafeChecksAreEnabled, internalChecksAreEnabled, module Debug, @@ -50,13 +50,13 @@ boundsChecksAreEnabled = True boundsChecksAreEnabled = False #endif -{-# INLINE unsafeChecksAreEnabled #-} -unsafeChecksAreEnabled :: Bool -#ifdef ACCELERATE_UNSAFE_CHECKS -unsafeChecksAreEnabled = True -#else -unsafeChecksAreEnabled = False -#endif +-- {-# INLINE unsafeChecksAreEnabled #-} +-- unsafeChecksAreEnabled :: Bool +-- #ifdef ACCELERATE_UNSAFE_CHECKS +-- unsafeChecksAreEnabled = True +-- #else +-- unsafeChecksAreEnabled = False +-- #endif {-# INLINE internalChecksAreEnabled #-} internalChecksAreEnabled :: Bool diff --git a/src/Data/Array/Accelerate/Debug/Internal/Flags.hs b/src/Data/Array/Accelerate/Debug/Internal/Flags.hs index dca1bea4e..59cb8f0c7 100644 --- a/src/Data/Array/Accelerate/Debug/Internal/Flags.hs +++ b/src/Data/Array/Accelerate/Debug/Internal/Flags.hs @@ -21,13 +21,11 @@ module Data.Array.Accelerate.Debug.Internal.Flags ( Value, - unfolding_use_threshold, - max_simplifier_iterations, getValue, setValue, Flag(..), - seq_sharing, acc_sharing, exp_sharing, array_fusion, simplify, inplace, flush_cache, force_recomp, + seq_sharing, acc_sharing, exp_sharing, array_fusion, inplace, force_recomp, fast_math, fast_permute_const, debug, verbose, dump_phases, dump_sharing, dump_fusion, dump_simpl_stats, dump_simpl_iterations, dump_vectorisation, dump_dot, dump_simpl_dot, dump_gc, dump_gc_stats, dump_cc, dump_ld, dump_asm, dump_exec, @@ -63,6 +61,10 @@ newtype Value = Value (Ptr Word32) -- see flags.c -- bits for other configuration options, not controlled by the command line -- flags. -- +-- However, as there are currently no such special configuration options, the +-- bit hack complexity here is unnecessary. It's kept as an easter egg for +-- future maintainers. +-- instance Enum Flag where toEnum = Flag fromEnum (Flag x) = x @@ -75,29 +77,27 @@ instance Show Flag where 1 -> "acc-sharing" 2 -> "exp-sharing" 3 -> "fusion" - 4 -> "simplify" - 5 -> "inplace" - 6 -> "fast-math" - 7 -> "fast-permute-const" - 8 -> "flush_cache" - 9 -> "force-recomp" - 10 -> "debug" - 11 -> "verbose" - 12 -> "dump-phases" - 13 -> "dump-sharing" - 14 -> "dump-fusion" - 15 -> "dump-simpl-stats" - 16 -> "dump-simpl-iterations" - 17 -> "dump-vectorisation" - 18 -> "dump-dot" - 19 -> "dump-simpl-dot" - 20 -> "dump-gc" - 21 -> "dump-gc-stats" - 22 -> "dump-cc" - 23 -> "dump-ld" - 24 -> "dump-asm" - 25 -> "dump-exec" - 26 -> "dump-sched" + 4 -> "inplace" + 5 -> "fast-math" + 6 -> "fast-permute-const" + 7 -> "force-recomp" + 8 -> "debug" + 9 -> "verbose" + 10 -> "dump-phases" + 11 -> "dump-sharing" + 12 -> "dump-fusion" + 13 -> "dump-simpl-stats" + 14 -> "dump-simpl-iterations" + 15 -> "dump-vectorisation" + 16 -> "dump-dot" + 17 -> "dump-simpl-dot" + 18 -> "dump-gc" + 19 -> "dump-gc-stats" + 20 -> "dump-cc" + 21 -> "dump-ld" + 22 -> "dump-asm" + 23 -> "dump-exec" + 24 -> "dump-sched" _ -> show x -- | Conditional execution of a monadic debugging expression. @@ -178,56 +178,44 @@ clearFlags = mapM_ clearFlag -- foreign import ccall "&__cmd_line_flags" __cmd_line_flags :: Ptr Word32 --- These @-f=INT@ values are used by the compiler --- -foreign import ccall "&__unfolding_use_threshold" unfolding_use_threshold :: Value -- the magic cut-off figure for inlining -foreign import ccall "&__max_simplifier_iterations" max_simplifier_iterations :: Value -- maximum number of scalar simplification passes - #else __cmd_line_flags :: Ptr Word32 __cmd_line_flags = undefined -unfolding_use_threshold :: Value -unfolding_use_threshold = undefined - -max_simplifier_iterations :: Value -max_simplifier_iterations = undefined - #endif -- These @-f@ flags can be reversed with @-fno-@ -- +-- SEE: [layout of command line options bitfield] seq_sharing = Flag 0 -- recover sharing of sequence expressions acc_sharing = Flag 1 -- recover sharing of array computations exp_sharing = Flag 2 -- recover sharing of scalar expressions array_fusion = Flag 3 -- fuse array expressions -simplify = Flag 4 -- simplify scalar expressions -inplace = Flag 5 -- allow (safe) in-place array updates -fast_math = Flag 6 -- use faster, less precise math library operations -fast_permute_const = Flag 7 -- allow non-atomic permute const for product types -flush_cache = Flag 8 -- delete persistent compilation cache(s) -force_recomp = Flag 9 -- force recompilation of array programs +inplace = Flag 4 -- allow (safe) in-place array updates +fast_math = Flag 5 -- use faster, less precise math library operations +fast_permute_const = Flag 6 -- allow non-atomic permute const for product types +force_recomp = Flag 7 -- force recompilation of array programs -- These debugging flags are disable by default and are enabled with @-d@ -- -debug = Flag 10 -- compile code with debugging symbols (-g) -verbose = Flag 11 -- be very chatty -dump_phases = Flag 12 -- print information about each phase of the compiler -dump_sharing = Flag 13 -- sharing recovery phase -dump_fusion = Flag 14 -- array fusion phase -dump_simpl_stats = Flag 15 -- statistics form fusion/simplification -dump_simpl_iterations = Flag 16 -- output from each simplifier iteration -dump_vectorisation = Flag 17 -- output from the vectoriser -dump_dot = Flag 18 -- generate dot output of the program -dump_simpl_dot = Flag 19 -- generate simplified dot output -dump_gc = Flag 20 -- trace garbage collector -dump_gc_stats = Flag 21 -- print final GC statistics -dump_cc = Flag 22 -- trace code generation & compilation -dump_ld = Flag 23 -- trace runtime linker -dump_asm = Flag 24 -- trace assembler -dump_exec = Flag 25 -- trace execution -dump_sched = Flag 26 -- trace scheduler +debug = Flag 8 -- compile code with debugging symbols (-g) +verbose = Flag 9 -- be very chatty +dump_phases = Flag 10 -- print information about each phase of the compiler +dump_sharing = Flag 11 -- sharing recovery phase +dump_fusion = Flag 12 -- array fusion phase +dump_simpl_stats = Flag 13 -- statistics form fusion/simplification +dump_simpl_iterations = Flag 14 -- output from each simplifier iteration +dump_vectorisation = Flag 15 -- output from the vectoriser +dump_dot = Flag 16 -- generate dot output of the program +dump_simpl_dot = Flag 17 -- generate simplified dot output +dump_gc = Flag 18 -- trace garbage collector +dump_gc_stats = Flag 19 -- print final GC statistics +dump_cc = Flag 20 -- trace code generation & compilation +dump_ld = Flag 21 -- trace runtime linker +dump_asm = Flag 22 -- trace assembler +dump_exec = Flag 23 -- trace execution +dump_sched = Flag 24 -- trace scheduler -- Note: [linking to .c files] diff --git a/src/Data/Array/Accelerate/Error.hs b/src/Data/Array/Accelerate/Error.hs index 3f00a6b5c..5ce86630b 100644 --- a/src/Data/Array/Accelerate/Error.hs +++ b/src/Data/Array/Accelerate/Error.hs @@ -128,7 +128,7 @@ ppCallStack = ppLines {-# INLINE doChecks #-} doChecks :: Check -> Bool doChecks Bounds = doBoundsChecks -doChecks Unsafe = doUnsafeChecks +doChecks Unsafe = internalError "If you want to do unsafe checks, re-enable the unsafe-checks cabal flag. It is currently disabled because there are no unsafe operation checks." doChecks Internal = doInternalChecks doBoundsChecks :: Bool @@ -138,12 +138,12 @@ doBoundsChecks = True doBoundsChecks = False #endif -doUnsafeChecks :: Bool -#ifdef ACCELERATE_UNSAFE_CHECKS -doUnsafeChecks = True -#else -doUnsafeChecks = False -#endif +-- doUnsafeChecks :: Bool +-- #ifdef ACCELERATE_UNSAFE_CHECKS +-- doUnsafeChecks = True +-- #else +-- doUnsafeChecks = False +-- #endif doInternalChecks :: Bool #ifdef ACCELERATE_INTERNAL_CHECKS diff --git a/src/Data/Array/Accelerate/Trafo/Config.hs b/src/Data/Array/Accelerate/Trafo/Config.hs index 4b0a26e74..40667e090 100644 --- a/src/Data/Array/Accelerate/Trafo/Config.hs +++ b/src/Data/Array/Accelerate/Trafo/Config.hs @@ -15,12 +15,8 @@ module Data.Array.Accelerate.Trafo.Config ( Flag(..), defaultOptions, - -- Other options not controlled by the command line flags - -- float_out_acc, - ) where -import Data.Bits import Data.BitSet import Data.Array.Accelerate.Debug.Internal.Flags as F @@ -30,20 +26,12 @@ import Foreign.Storable data Config = Config - { options :: {-# UNPACK #-} !(BitSet Word32 Flag) - , unfolding_use_threshold :: {-# UNPACK #-} !Int - , max_simplifier_iterations :: {-# UNPACK #-} !Int + { options :: {-# UNPACK #-} !(BitSet Word32 Flag) } deriving Show {-# NOINLINE defaultOptions #-} defaultOptions :: Config defaultOptions = unsafePerformIO $! - Config <$> (BitSet . (0x80000000 .|.)) <$> peek F.__cmd_line_flags - <*> (fromIntegral <$> F.getValue F.unfolding_use_threshold) - <*> (fromIntegral <$> F.getValue F.max_simplifier_iterations) - --- Extra options not covered by command line flags --- --- float_out_acc = Flag 31 + Config <$> BitSet <$> peek F.__cmd_line_flags