Skip to content

Commit

Permalink
[Support] Add a new type to represent half precision values
Browse files Browse the repository at this point in the history
*Description*
The new type performs all the operation in single precision floating
point, while converting back and forth into a 16-bit storage.
The conversions are performed by the same FP16 library used by Caffe2.

*Testing*
Added specific unittests to check the basic functionality of the new
type.

*Documentation*
Mention the additional submodule dependency for the FP16 conversions.

This is related to pytorch#1329
  • Loading branch information
Quentin Colombet authored and qcolombet committed Sep 20, 2018
1 parent 3113147 commit 7a84fbc
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
[submodule "thirdparty/onnx"]
path = thirdparty/onnx
url = https://github.com/onnx/onnx.git
[submodule "thirdparty/fp16"]
path = thirdparty/fp16
url = https://github.com/Maratyszcza/FP16.git
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ if(NOT EXISTS "${GLOW_THIRDPARTY_DIR}/onnx")
message(FATAL_ERROR "No onnx git submodule. Run: git submodule update --init --recursive")
endif()

if(NOT EXISTS "${GLOW_THIRDPARTY_DIR}/fp16")
message(FATAL_ERROR "No fp16 git submodule. Run: git submodule update --init --recursive")
else()
include_directories(${GLOW_THIRDPARTY_DIR}/fp16/include)
endif()

add_subdirectory(lib)
add_subdirectory(tools)

Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ compiler that supports C++11, on CMake, LLVM, protocol buffers, and libpng.

#### Submodules

Glow depends on googletest as a submodule. To get it, from the glow directory, run:
Glow depends on a few submodules: googletest, onnx, and a library
for FP16 conversions.

To get them, from the glow directory, run:

```bash
git submodule update --init --recursive
Expand Down
62 changes: 62 additions & 0 deletions include/glow/Support/Float16.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/**
* Copyright (c) 2018-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GLOW_SUPPORT_FLOAT16_H
#define GLOW_SUPPORT_FLOAT16_H

#include "fp16.h"

#include <cstdint>

namespace glow {

/// Use a proxy type in case we need to change it in the future.
using Float16Storage = uint16_t;
class float16 {
Float16Storage data_;

public:
float16(float data = 0.0) { data_ = fp16_ieee_from_fp32_value(data); }

/// Arithmetic operators.
float16 operator*(const float16 &b) const {
return float16(operator float() * float(b));
}
float16 operator/(const float16 &b) const {
return float16(operator float() / float(b));
}
float16 operator+(const float16 &b) const {
return float16(operator float() + float(b));
}
float16 operator-(const float16 &b) const {
return float16(operator float() - float(b));
}

/// Comparisons.
bool operator<(const float16 &b) const { return operator float() < float(b); }
bool operator>(const float16 &b) const { return operator float() > float(b); }
bool operator==(const float16 &b) const {
return operator float() == float(b);
}

/// Cast operators.
operator double() const { return double(operator float()); }
operator float() const { return fp16_ieee_to_fp32_value(data_); }
operator long long() const { return static_cast<long long>(data_); }
}; // End class float16.

} // End namespace glow.

#endif // GLOW_SUPPORT_FLOAT16_H
9 changes: 9 additions & 0 deletions tests/unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,15 @@ target_link_libraries(quantizationTest
testMain)
add_glow_test(quantizationTest ${GLOW_BINARY_DIR}/tests/quantizationTest)

add_executable(float16Test
float16Test.cpp)
target_link_libraries(float16Test
PRIVATE
Support
gtest
testMain)
add_glow_test(float16Test ${GLOW_BINARY_DIR}/tests/float16Test)

add_executable(UtilsTest
UtilsTest.cpp)
target_link_libraries(UtilsTest
Expand Down
70 changes: 70 additions & 0 deletions tests/unittests/float16Test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/**
* Copyright (c) 2018-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// This file tests the basic functionality of the float16 type.
// This is by no mean a test to show the IEEE 754 compliance!

#include "glow/Support/Float16.h"

#include "gtest/gtest.h"

using namespace glow;

TEST(Float16, add) {
float16 a = 2.0;
float16 b = 1.0;
EXPECT_EQ(a + b, float16(float(a) + float(b)));
}

TEST(Float16, sub) {
float16 a = 2.0;
float16 b = 0.5;
EXPECT_EQ(a - b, float16(float(a) - float(b)));
}

TEST(Float16, mul) {
float16 a = 3.5;
float16 b = 3.0;
EXPECT_EQ(a * b, float16(float(a) * float(b)));
}

TEST(Float16, div) {
float16 a = 16.5;
float16 b = -3.0;
EXPECT_EQ(a / b, float16(float(a) / float(b)));
}

TEST(Float16, gt) {
float16 a = 13.25;
float16 b = 3.56;
EXPECT_EQ(a > b, float(a) > float(b));
EXPECT_TRUE(a > b);
}

TEST(Float16, lt) {
float16 a = 123.75;
float16 b = -12.6;
EXPECT_EQ(a < b, float(a) < float(b));
EXPECT_FALSE(a < b);
}

TEST(Float16, eq) {
float16 a = -483.455;
float16 b = 453.0;
EXPECT_EQ(a == b, float(a) == float(b));
EXPECT_FALSE(a == b);
EXPECT_TRUE(a == a);
}
1 change: 1 addition & 0 deletions thirdparty/fp16
Submodule fp16 added at 4b37bd

0 comments on commit 7a84fbc

Please sign in to comment.