[Support] Add a new type to represent half precision values

*Description* The new type performs all the operation in single precision floating point, while converting back and forth into a 16-bit storage. The conversions are performed by the same FP16 library used by Caffe2. *Testing* Added specific unittests to check the basic functionality of the new type. *Documentation* Mention the additional submodule dependency for the FP16 conversions. This is related to pytorch#1329
QiJune · Sep 20, 2018 · 7a84fbc · 7a84fbc
1 parent 3113147
commit 7a84fbc
Show file tree

Hide file tree

Showing 7 changed files with 155 additions and 1 deletion.
diff --git a/.gitmodules b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "thirdparty/onnx"]
 	path = thirdparty/onnx
 	url = https://github.com/onnx/onnx.git
+[submodule "thirdparty/fp16"]
+	path = thirdparty/fp16
+	url = https://github.com/Maratyszcza/FP16.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -79,6 +79,12 @@ if(NOT EXISTS "${GLOW_THIRDPARTY_DIR}/onnx")
   message(FATAL_ERROR "No onnx git submodule. Run: git submodule update --init --recursive")
 endif()
 
+if(NOT EXISTS "${GLOW_THIRDPARTY_DIR}/fp16")
+  message(FATAL_ERROR "No fp16 git submodule. Run: git submodule update --init --recursive")
+else()
+  include_directories(${GLOW_THIRDPARTY_DIR}/fp16/include)
+endif()
+
 add_subdirectory(lib)
 add_subdirectory(tools)
 

diff --git a/README.md b/README.md
@@ -43,7 +43,10 @@ compiler that supports C++11, on CMake, LLVM, protocol buffers, and libpng.
 
 #### Submodules
 
-Glow depends on googletest as a submodule. To get it, from the glow directory, run:
+Glow depends on a few submodules: googletest, onnx, and a library
+for FP16 conversions.
+
+To get them, from the glow directory, run:
 
   ```bash
   git submodule update --init --recursive

diff --git a/include/glow/Support/Float16.h b/include/glow/Support/Float16.h
@@ -0,0 +1,62 @@
+/**
+ * Copyright (c) 2018-present, Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef GLOW_SUPPORT_FLOAT16_H
+#define GLOW_SUPPORT_FLOAT16_H
+
+#include "fp16.h"
+
+#include <cstdint>
+
+namespace glow {
+
+/// Use a proxy type in case we need to change it in the future.
+using Float16Storage = uint16_t;
+class float16 {
+  Float16Storage data_;
+
+public:
+  float16(float data = 0.0) { data_ = fp16_ieee_from_fp32_value(data); }
+
+  /// Arithmetic operators.
+  float16 operator*(const float16 &b) const {
+    return float16(operator float() * float(b));
+  }
+  float16 operator/(const float16 &b) const {
+    return float16(operator float() / float(b));
+  }
+  float16 operator+(const float16 &b) const {
+    return float16(operator float() + float(b));
+  }
+  float16 operator-(const float16 &b) const {
+    return float16(operator float() - float(b));
+  }
+
+  /// Comparisons.
+  bool operator<(const float16 &b) const { return operator float() < float(b); }
+  bool operator>(const float16 &b) const { return operator float() > float(b); }
+  bool operator==(const float16 &b) const {
+    return operator float() == float(b);
+  }
+
+  /// Cast operators.
+  operator double() const { return double(operator float()); }
+  operator float() const { return fp16_ieee_to_fp32_value(data_); }
+  operator long long() const { return static_cast<long long>(data_); }
+}; // End class float16.
+
+} // End namespace glow.
+
+#endif // GLOW_SUPPORT_FLOAT16_H
diff --git a/tests/unittests/CMakeLists.txt b/tests/unittests/CMakeLists.txt
@@ -147,6 +147,15 @@ target_link_libraries(quantizationTest
                         testMain)
 add_glow_test(quantizationTest ${GLOW_BINARY_DIR}/tests/quantizationTest)
 
+add_executable(float16Test
+               float16Test.cpp)
+target_link_libraries(float16Test
+                      PRIVATE
+                        Support
+                        gtest
+                        testMain)
+add_glow_test(float16Test ${GLOW_BINARY_DIR}/tests/float16Test)
+
 add_executable(UtilsTest
                UtilsTest.cpp)
 target_link_libraries(UtilsTest

diff --git a/tests/unittests/float16Test.cpp b/tests/unittests/float16Test.cpp
@@ -0,0 +1,70 @@
+/**
+ * Copyright (c) 2018-present, Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file tests the basic functionality of the float16 type.
+// This is by no mean a test to show the IEEE 754 compliance!
+
+#include "glow/Support/Float16.h"
+
+#include "gtest/gtest.h"
+
+using namespace glow;
+
+TEST(Float16, add) {
+  float16 a = 2.0;
+  float16 b = 1.0;
+  EXPECT_EQ(a + b, float16(float(a) + float(b)));
+}
+
+TEST(Float16, sub) {
+  float16 a = 2.0;
+  float16 b = 0.5;
+  EXPECT_EQ(a - b, float16(float(a) - float(b)));
+}
+
+TEST(Float16, mul) {
+  float16 a = 3.5;
+  float16 b = 3.0;
+  EXPECT_EQ(a * b, float16(float(a) * float(b)));
+}
+
+TEST(Float16, div) {
+  float16 a = 16.5;
+  float16 b = -3.0;
+  EXPECT_EQ(a / b, float16(float(a) / float(b)));
+}
+
+TEST(Float16, gt) {
+  float16 a = 13.25;
+  float16 b = 3.56;
+  EXPECT_EQ(a > b, float(a) > float(b));
+  EXPECT_TRUE(a > b);
+}
+
+TEST(Float16, lt) {
+  float16 a = 123.75;
+  float16 b = -12.6;
+  EXPECT_EQ(a < b, float(a) < float(b));
+  EXPECT_FALSE(a < b);
+}
+
+TEST(Float16, eq) {
+  float16 a = -483.455;
+  float16 b = 453.0;
+  EXPECT_EQ(a == b, float(a) == float(b));
+  EXPECT_FALSE(a == b);
+  EXPECT_TRUE(a == a);
+}
diff --git a/thirdparty/fp16 b/thirdparty/fp16