Complete the revision of README

Mikolaj · Mikolaj · commit 97418abf2267 · 2025-03-15T15:23:57.000+01:00
diff --git a/README.md b/README.md
@@ -97,129 +97,102 @@ A shorthand that creates the symbolic derivative program, simplifies it and inte
 ```
 
 
-# WIP: The examples below are outdated and will be replaced soon using a new API
-
-
-## Computing Jacobians
-
--- TODO: we can have vector/matrix/tensor codomains, but not pair codomains
--- until #68 is done;
--- perhaps a vector codomain example, with a 1000x3 Jacobian, would make sense?
--- 2 years later: actually, we can now have TKProduct codomains.
-
-Now let's consider a function from 'R^n` to `R^m'.  We don't want the gradient, but instead the Jacobian.
-```hs
--- A function that goes from `R^3` to `R^2`.
-foo :: RealFloat a => (a,a,a) -> (a,a)
-foo (x,y,z) =
-  let w = x * sin y
-  in (atan2 z w, z * w)
-```
-TODO: show how the 2x3 Jacobian emerges from here
-
-
-
 ## Forall shapes and sizes
 
-An additional feature of this library is a type system for tensor shape arithmetic. The following code is a part of convolutional neural network definition, for which horde-ad computes the gradient of a shape determined by the shape of input data and initial parameters. The compiler is able to infer a lot of tensor shapes, deriving them both from dynamic dimension arguments (the first two lines of parameters to the function) and from static type-level hints. Look at this beauty.
+An additional feature of this library is a type system for tensor shape arithmetic. The following code is a part of convolutional neural network definition, for which horde-ad computes the gradient of a shape determined by the shape of input data and of initial parameters. The compiler is able to infer a lot of tensor shapes, deriving them both from dynamic dimension arguments (the first two lines of parameters to the function) and from static type-level hints. Look at this beauty.
 ```hs
 convMnistTwoS
   kh@SNat kw@SNat h@SNat w@SNat
-  c_in@SNat c_out@SNat _n_hidden@SNat batch_size@SNat
-    -- integer parameters denoting basic dimensions, with some notational noise
-  input              -- input images, shape (batch_size, c_in, h, w)
-  (ker1, bias1)      -- layer1 kernel, shape (c_out, c_in, kh+1, kw+1); and bias, shape (c_out)
-  (ker2, bias2)      -- layer2 kernel, shape (c_out, c_out, kh+1, kw+1); and bias, shape (c_out)
+  c_out@SNat _n_hidden@SNat batch_size@SNat
+    -- integer parameters denoting basic dimensions
+  input              -- input images, shape [batch_size, 1, h, w]
+  (ker1, bias1)      -- layer1 kernel, shape [c_out, 1, kh+1, kw+1]; and bias, shape [c_out]
+  (ker2, bias2)      -- layer2 kernel, shape [c_out, c_out, kh+1, kw+1]; and bias, shape [c_out]
   ( weightsDense     -- dense layer weights,
-                     -- shape (n_hidden, c_out * ((h+kh)/2 + kh)/2, ((w+kw)/2 + kw)/2)
-  , biasesDense )    -- dense layer biases, shape (n_hidden)
-  ( weightsReadout   -- readout layer weights, shape (10, n_hidden)
-  , biasesReadout )  -- readout layer biases (10)
-  =                  -- -> output classification, shape (10, batch_size)
-  let t1 = convMnistLayerS kh kw
-                           h w
-                           c_in c_out batch_size
-                           ker1 (constant input) bias1
-      t2 = convMnistLayerS kh kw
-                           (SNat @((h + kh) `Div` 2)) (SNat @((w + kw) `Div` 2))
+                     -- shape [n_hidden, c_out * (h/4) * (w/4)]
+  , biasesDense )    -- dense layer biases, shape [n_hidden]
+  ( weightsReadout   -- readout layer weights, shape [10, n_hidden]
+  , biasesReadout )  -- readout layer biases [10]
+  =                  -- -> output classification, shape [10, batch_size]
+  gcastWith (unsafeCoerceRefl :: Div (Div w 2) 2 :~: Div w 4) $
+  gcastWith (unsafeCoerceRefl :: Div (Div h 2) 2 :~: Div h 4) $
+  let t1 = convMnistLayerS kh kw h w
+                           (SNat @1) c_out batch_size
+                           ker1 (sfromPrimal input) bias1
+      t2 = convMnistLayerS kh kw (SNat @(h `Div` 2)) (SNat @(w `Div` 2))
                            c_out c_out batch_size
                            ker2 t1 bias2
-      m1 = mapOuterS reshapeS t2
-      m2 = transpose2S m1
-      denseLayer = weightsDense <>$ m2 + asColumnS biasesDense
-      denseRelu = relu denseLayer
-  in weightsReadout <>$ denseRelu + asColumnS biasesReadout
+      m1 = sreshape t2
+      denseLayer = weightsDense `smatmul2` str m1
+                   + str (sreplicate biasesDense)
+  in weightsReadout `smatmul2` reluS denseLayer
+     + str (sreplicate biasesReadout)
 ```
 But we don't just want the shapes in comments and in runtime expressions; we want them as a compiler-verified documentation in the form of the type signature of the function:
 ```hs
 convMnistTwoS
-  :: forall kh kw h w c_in c_out n_hidden batch_size d r.
-     ( 1 <= kh             -- kernel height is large enough
-     , 1 <= kw             -- kernel width is large enough
-     , ADModeAndNum d r )  -- differentiation mode and numeric type are known to the engine
-  => -- The two boilerplate lines below tie type parameters to the corresponding
-     -- value parameters (built with SNat) denoting basic dimensions.
-     SNat kh -> SNat kw -> SNat h -> SNat w
-  -> SNat c_in -> SNat c_out -> SNat n_hidden -> SNat batch_size
-  -> OS.Array '[batch_size, c_in, h, w] r
-  -> ( ADVal d (OS.Array '[c_out, c_in, kh + 1, kw + 1] r)
-     , ADVal d (OS.Array '[c_out] r ) )
-  -> ( ADVal d (OS.Array '[c_out, c_out, kh + 1, kw + 1] r)
-     , ADVal d (OS.Array '[c_out] r) )
-  -> ( ADVal d (OS.Array '[ n_hidden
-                          , c_out * (((h + kh) `Div` 2 + kh) `Div` 2)
-                                  * (((w + kw) `Div` 2 + kw) `Div` 2)
-                          ] r)
-     , ADVal d (OS.Array '[n_hidden] r) )
-  -> ( ADVal d (OS.Array '[10, n_hidden] r)
-     , ADVal d (OS.Array '[10] r) )
-  -> ADVal d (OS.Array '[10, batch_size] r)
+  :: forall kh kw h w c_out n_hidden batch_size target r.
+     ( 1 <= kh  -- kernel height is large enough
+     , 1 <= kw  -- kernel width is large enough
+     , ADReady target, GoodScalar r, Differentiable r )
+  => SNat kh -> SNat kw -> SNat h -> SNat w
+  -> SNat c_out -> SNat n_hidden -> SNat batch_size
+       -- ^ these boilerplate lines tie type parameters to the corresponding
+       -- SNat value parameters denoting basic dimensions
+  -> PrimalOf target (TKS '[batch_size, 1, h, w] r)
+  -> ( ( target (TKS '[c_out, 1, kh + 1, kw + 1] r)
+       , target (TKS '[c_out] r) )
+     , ( target (TKS '[c_out, c_out, kh + 1, kw + 1] r)
+       , target (TKS '[c_out] r) )
+     , ( target (TKS '[n_hidden, c_out * (h `Div` 4) * (w `Div` 4) ] r)
+       , target (TKS '[n_hidden] r) )
+     , ( target (TKS '[10, n_hidden] r)
+       , target (TKS '[10] r) ) )
+  -> target (TKS '[SizeMnistLabel, batch_size] r)
 ```
 
 The full neural network definition from which this function is taken can be found at
 
 https://github.com/Mikolaj/horde-ad/tree/master/example
 
-in file `MnistCnnShaped.hs` and the directory contains several other sample neural networks for MNIST digit classification. Among them are recurrent, convolutional and fully connected networks based on fully typed tensors (sizes of all dimensions are tracked in the types, as above) as well as weakly typed fully connected networks built with, respectively, matrices, vectors and raw scalars (working with scalars is the most flexible but slowest; all others have comparable performance on CPU).
+in file `MnistCnnShaped2.hs` and the directory contains several other sample neural networks for MNIST digit classification. Among them are recurrent, convolutional and fully connected networks based on fully typed tensors (sizes of all dimensions are tracked in the types, as above) as well as their weakly typed variants that track only the ranks of tensors. It's possible to mix the two typing styles within one function signature and even within one shape description.
 
 
 Compilation from source
 -----------------------
 
-Because we use [hmatrix] the OS needs libraries that on Ubuntu/Debian
-are called libgsl0-dev, liblapack-dev and libatlas-base-dev.
-See https://github.com/haskell-numerics/hmatrix/blob/master/INSTALL.md
-for information about other OSes.
-Other Haskell packages need their usual C library dependencies,
-as well, e.g., package zlib needs C library zlib1g-dev.
+The Haskell packages [we depend on](https://github.com/Mikolaj/horde-ad/blob/master/horde-ad.cabal) need their usual C library dependencies,
+e.g., package zlib needs the C library zlib1g-dev or an equivalent.
+At this time, we don't depend on any GPU hardware nor bindings.
 
 For development, copying the included `cabal.project.local.development`
 to `cabal.project.local` provides a sensible default to run `cabal build` with.
 For extensive testing, a command like
 
-    cabal test minimalTest --enable-optimization -f test_seq
+    cabal test minimalTest --enable-optimization
 
-ensures that the code is compiled with optimization and so executes the rather
-computation-intensive testsuites in reasonable time.
+ensures that the code is compiled with optimization and consequently
+executes the rather computation-intensive testsuites in reasonable time.
 
 
 Running tests
 -------------
 
-The test suite can run in parallel but, if so, the PP tests need to be disabled:
+The `parallelTest` test suite consists of large tests and runs in parallel
 
-    cabal test simplifiedOnlyTest --enable-optimization --test-options='-p "! /PP/"'
+    cabal test parallelTest --enable-optimization
 
-Parallel run may cause the extra printf messages coming from within the tests
-to be out of order. To keep your screen tidy, simply redirect `stderr`,
-e.g. via: `2>/dev/null`:
+which is likely to cause the extra printf messages coming from within
+the tests to be out of order. To keep your screen tidy, simply redirect
+`stderr`, e.g.,
 
-    cabal test simplifiedOnlyTest --enable-optimization --test-options='-p "! /PP/"' 2>/dev/null
+    cabal test parallelTest --enable-optimization 2>/dev/null
 
-You can also run the test suite sequentially and then all tests can be included
-and the extra printf messages are displayed fine most of the time:
+The remainder of the test suite is set up to run sequentially to simplify
+automatic verication of results that may vary slightly depending on
+execution order
 
-    cabal test simplifiedOnlyTest --enable-optimization -f test_seq
+    cabal test CAFlessTest --enable-optimization
 
 
 Coding style
@@ -233,8 +206,8 @@ Spaces around arithmetic operators encouraged.
 Generally, relax and try to stick to the style apparent in a file
 you are editing. Put big formatting changes in separate commits.
 
-Haddocks should be provided for all module headers and for all functions
-and types, or at least main sections, from the most important modules.
+Haddocks should be provided for all module headers and for the main
+functions and types from the most important modules.
 Apart of that, only particularly significant functions and types
 are distinguished by having a haddock. If minor ones have comments,
 they should not be haddocks and they are permitted to describe
@@ -245,11 +218,6 @@ of comments, unless too verbose.
 Copyright
 ---------
 
-Copyright 2023 Mikolaj Konarski, Well-Typed LLP and others (see git history)
+Copyright 2023--2025 Mikolaj Konarski, Well-Typed LLP and others (see git history)
 
 License: BSD-3-Clause (see file LICENSE)
-
-
-
-[hmatrix]: https://hackage.haskell.org/package/hmatrix
-[orthotope]: https://hackage.haskell.org/package/orthotope
diff --git a/cabal.project.local.development b/cabal.project.local.development
@@ -1,6 +1,6 @@
-ignore-project: False
-tests: True
-benchmarks: True
+package horde-ad
+  tests: True
+  benchmarks: True
+
 test-show-details: direct
 optimization: False
-
diff --git a/example/MnistCnnShaped2.hs b/example/MnistCnnShaped2.hs
@@ -43,10 +43,8 @@ convMnistLayerS
      ( 1 <= kh
      , 1 <= kw  -- wrongly reported as redundant due to plugins
      , ADReady target, GoodScalar r, Differentiable r )
-  => SNat kh -> SNat kw
-  -> SNat h -> SNat w
-  -> SNat c_in -> SNat c_out
-  -> SNat batch_size
+  => SNat kh -> SNat kw -> SNat h -> SNat w
+  -> SNat c_in -> SNat c_out -> SNat batch_size
   -> target (TKS '[c_out, c_in, kh + 1, kw + 1] r)
   -> target (TKS '[batch_size, c_in, h, w] r)
   -> target (TKS '[c_out] r)
@@ -63,43 +61,38 @@ convMnistLayerS SNat SNat SNat SNat SNat SNat SNat
 
 convMnistTwoS
   :: forall kh kw h w c_out n_hidden batch_size target r.
-       -- @h@ and @w@ are fixed for MNIST, but may be different, e.g., in tests
-     ( 1 <= kh             -- kernel height is large enough
-     , 1 <= kw             -- kernel width is large enough
+       -- @h@ and @w@ are fixed with MNIST data, but not with test data
+     ( 1 <= kh  -- kernel height is large enough
+     , 1 <= kw  -- kernel width is large enough
      , ADReady target, GoodScalar r, Differentiable r )
-  => SNat kh -> SNat kw
-  -> SNat h -> SNat w
+  => SNat kh -> SNat kw -> SNat h -> SNat w
   -> SNat c_out -> SNat n_hidden -> SNat batch_size
        -- ^ these boilerplate lines tie type parameters to the corresponding
-       -- value parameters (@SNat@ below) denoting basic dimensions
+       -- SNat value parameters denoting basic dimensions
   -> PrimalOf target (TKS '[batch_size, 1, h, w] r)  -- ^ input images
   -> ADCnnMnistParametersShaped target h w kh kw c_out n_hidden r
-  -> target (TKS '[SizeMnistLabel, batch_size] r)  -- ^ classification
-convMnistTwoS kh@SNat kw@SNat
-              h@SNat w@SNat
+       -- ^ parameters
+  -> target (TKS '[SizeMnistLabel, batch_size] r)  -- ^ output classification
+convMnistTwoS kh@SNat kw@SNat h@SNat w@SNat
               c_out@SNat _n_hidden@SNat batch_size@SNat
               input
               ( (ker1, bias1), (ker2, bias2)
               , (weightsDense, biasesDense), (weightsReadout, biasesReadout) ) =
   gcastWith (unsafeCoerceRefl :: Div (Div w 2) 2 :~: Div w 4) $
   gcastWith (unsafeCoerceRefl :: Div (Div h 2) 2 :~: Div h 4) $
-  let t1 = convMnistLayerS kh kw
-                           h w
+  let t1 = convMnistLayerS kh kw h w
                            (SNat @1) c_out batch_size
                            ker1 (sfromPrimal input) bias1
-      t2 :: target (TKS '[batch_size, c_out, h `Div` 4, w `Div` 4] r)
-      t2 = convMnistLayerS kh kw
-                           (SNat @(h `Div` 2)) (SNat @(w `Div` 2))
+--      t2 :: target (TKS '[batch_size, c_out, h `Div` 4, w `Div` 4] r)
+      t2 = convMnistLayerS kh kw (SNat @(h `Div` 2)) (SNat @(w `Div` 2))
                            c_out c_out batch_size
                            ker2 t1 bias2
-      m1 :: target (TKS '[batch_size, c_out * (h `Div` 4) * (w `Div` 4)] r)
+--      m1 :: target (TKS '[batch_size, c_out * (h `Div` 4) * (w `Div` 4)] r)
       m1 = sreshape t2
-      m2 = str m1
-      denseLayer = weightsDense `smatmul2` m2
-                   + str (sreplicate {-@batch_size-} biasesDense)
-      denseRelu = reluS denseLayer
-  in weightsReadout `smatmul2` denseRelu
-     + str (sreplicate {-@batch_size-} biasesReadout)
+      denseLayer = weightsDense `smatmul2` str m1
+                   + str (sreplicate biasesDense)
+  in weightsReadout `smatmul2` reluS denseLayer
+     + str (sreplicate biasesReadout)
 
 convMnistLossFusedS
   :: forall kh kw h w c_out n_hidden batch_size target r.