rename: s/OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint/OutputStageQuantizeDownInt32ByFixedPoint/g - and keep the old name supported for compatibility

bjacob · bjacob · commit 15a66adeb1bc · 2018-02-14T13:51:25.000-05:00
diff --git a/doc/quantization.md b/doc/quantization.md
@@ -301,7 +301,7 @@ the particular quantization paradigm that we detailed above in this document.
 The specific output pipeline stage implementing the present quantization
 paradigm, i.e. implementing the precise computation detailed in the previous
 section (equation (5)), is
-`OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint`.
+`OutputStageQuantizeDownInt32ByFixedPoint`.
 
 Please refer to the comment explaining it in
 [public/output_stages.h](../public/output_stages.h).
@@ -313,7 +313,7 @@ The difference between the older legacy quantization paradigm described in
 document boils down to the difference between the legacy output stage
 implementing it, `OutputStageQuantizeDownInt32ToUint8Scale`, and the new output
 stage implementing the new paradigm,
-`OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint`.
+`OutputStageQuantizeDownInt32ByFixedPoint`.
 
 Please refer to the comments in
 [public/output_stages.h](../public/output_stages.h) for details about these two
@@ -323,7 +323,7 @@ Issues with the old output stage `OutputStageQuantizeDownInt32ToUint8Scale` are:
 
 1.  The int32 accumulators (inputs to the output stage) undergo a plain int32
     multiplication with a int32 multiplier, which may overflow. By contrast, in
-    the newer `OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint`, this
+    the newer `OutputStageQuantizeDownInt32ByFixedPoint`, this
     integer multiplication becomes a fixed-point multiplication and cannot
     overflow.
 
diff --git a/doc/quantization_example.cc b/doc/quantization_example.cc
@@ -201,7 +201,7 @@ std::ostream& operator<<(std::ostream& s,
 //
 // This is how to obtain the fixed-point multiplier and right shift
 // parameters to pass to
-// OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint.
+// OutputStageQuantizeDownInt32ByFixedPoint.
 //
 // Note: all this code only needs to run offline to generate the quantized
 // neural network workload, not at runtime on the
@@ -347,7 +347,7 @@ int main() {
             << "use quantized arithmetic.\n"
             << std::endl;
 
-  gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint
+  gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint
       quantize_down_stage;
   quantize_down_stage.result_offset_after_shift = result_offset;
   quantize_down_stage.result_fixedpoint_multiplier = quantized_multiplier;
diff --git a/internal/output.h b/internal/output.h
@@ -119,12 +119,12 @@ struct OutputStageEvalImpl<OutputStageQuantizeDownInt32ToUint8ScalePC<Shape>,
 
 template <int Size>
 struct OutputStageEvalBufferImpl<
-    OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint,
+    OutputStageQuantizeDownInt32ByFixedPoint,
     RegisterBuffer<std::int32_t, Size>> {
   typedef RegisterBuffer<std::int32_t, Size> InputType;
   typedef RegisterBuffer<std::int32_t, Size> OutputType;
 
-  typedef OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint OutputStage;
+  typedef OutputStageQuantizeDownInt32ByFixedPoint OutputStage;
 
   OutputStageEvalBufferImpl(const OutputStage& s) : output_stage(s) {}
 
diff --git a/public/output_stages.h b/public/output_stages.h
@@ -66,8 +66,9 @@ struct OutputStageQuantizeDownInt32ToUint8ScalePC {
 };
 
 // This output stage takes int32 values and returns still int32 values,
-// but "quantized down" to the uint8 scale; in other words, its output
-// is typically what one would then clamp to [0..255] and cast to uint8
+// but "quantized down" to a difference scale; for example, in a pipeline
+// that outputs uint8 values in [0..255], the output of this stage would be
+// int32 values ready to be clamped to [0..255] and casted to uint8
 // (see OutputStageSaturatingCastToUint8).
 //
 // This "quantization down" process depends on 3 parameters,
@@ -111,12 +112,17 @@ struct OutputStageQuantizeDownInt32ToUint8ScalePC {
 // expansions that implicitly rely on 0-padding. If 0 were not
 // a representable value, such operations would have to pad
 // using a nonzero value, introducing bias in the computation.
-struct OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint {
+struct OutputStageQuantizeDownInt32ByFixedPoint {
   std::int32_t result_fixedpoint_multiplier;
   std::int32_t result_shift;
   std::int32_t result_offset_after_shift;
 };
 
+// OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint is the old deprecated
+// name of OutputStageQuantizeDownInt32ByFixedPoint, before we noticed that
+// there really wasn't anything Uint8-specific about it.
+using OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint = OutputStageQuantizeDownInt32ByFixedPoint;
+
 // This output stage takes int32 values that are expected to be already
 // on the final uint8 scale, but not necessarily in the [0..255] range.
 // It clamps them to the [0..255] range and returns them casted to uint8.
diff --git a/test/benchmark_all_sizes.cc b/test/benchmark_all_sizes.cc
@@ -122,10 +122,10 @@ float benchmark_8bit(int rows, int depth, int cols) {
   MakeZero(&rhs);
   MakeZero(&result);
 
-  typedef std::tuple<OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint,
+  typedef std::tuple<OutputStageQuantizeDownInt32ByFixedPoint,
                      OutputStageSaturatingCastToUint8>
       Pipeline;
-  gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint
+  gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint
       quantize_down_stage;
   quantize_down_stage.result_offset_after_shift = 128;
   quantize_down_stage.result_fixedpoint_multiplier = 1234567890;
diff --git a/test/test.cc b/test/test.cc
@@ -1428,8 +1428,8 @@ void TestOutputStages(int rows, int depth, int cols, int result_offset,
     result_fixedpoint_shift++;
   }
   Check(result_fixedpoint_shift >= 0);
-  // Now test OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint
-  OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint
+  // Now test OutputStageQuantizeDownInt32ByFixedPoint
+  OutputStageQuantizeDownInt32ByFixedPoint
       quantize_down_by_fixedpoint_stage;
   quantize_down_by_fixedpoint_stage.result_offset_after_shift =
       static_cast<std::int32_t>(