forked from pytorch/glow
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Support] Add a new type to represent half precision values
*Description* The new type performs all the operation in single precision floating point, while converting back and forth into a 16-bit storage. The conversions are performed by the same FP16 library used by Caffe2. *Testing* Added specific unittests to check the basic functionality of the new type. *Documentation* Mention the additional submodule dependency for the FP16 conversions. This is related to pytorch#1329
- Loading branch information
Showing
7 changed files
with
155 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/** | ||
* Copyright (c) 2018-present, Facebook, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#ifndef GLOW_SUPPORT_FLOAT16_H | ||
#define GLOW_SUPPORT_FLOAT16_H | ||
|
||
#include "fp16.h" | ||
|
||
#include <cstdint> | ||
|
||
namespace glow { | ||
|
||
/// Use a proxy type in case we need to change it in the future. | ||
using Float16Storage = uint16_t; | ||
class float16 { | ||
Float16Storage data_; | ||
|
||
public: | ||
float16(float data = 0.0) { data_ = fp16_ieee_from_fp32_value(data); } | ||
|
||
/// Arithmetic operators. | ||
float16 operator*(const float16 &b) const { | ||
return float16(operator float() * float(b)); | ||
} | ||
float16 operator/(const float16 &b) const { | ||
return float16(operator float() / float(b)); | ||
} | ||
float16 operator+(const float16 &b) const { | ||
return float16(operator float() + float(b)); | ||
} | ||
float16 operator-(const float16 &b) const { | ||
return float16(operator float() - float(b)); | ||
} | ||
|
||
/// Comparisons. | ||
bool operator<(const float16 &b) const { return operator float() < float(b); } | ||
bool operator>(const float16 &b) const { return operator float() > float(b); } | ||
bool operator==(const float16 &b) const { | ||
return operator float() == float(b); | ||
} | ||
|
||
/// Cast operators. | ||
operator double() const { return double(operator float()); } | ||
operator float() const { return fp16_ieee_to_fp32_value(data_); } | ||
operator long long() const { return static_cast<long long>(data_); } | ||
}; // End class float16. | ||
|
||
} // End namespace glow. | ||
|
||
#endif // GLOW_SUPPORT_FLOAT16_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
/** | ||
* Copyright (c) 2018-present, Facebook, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
// This file tests the basic functionality of the float16 type. | ||
// This is by no mean a test to show the IEEE 754 compliance! | ||
|
||
#include "glow/Support/Float16.h" | ||
|
||
#include "gtest/gtest.h" | ||
|
||
using namespace glow; | ||
|
||
TEST(Float16, add) { | ||
float16 a = 2.0; | ||
float16 b = 1.0; | ||
EXPECT_EQ(a + b, float16(float(a) + float(b))); | ||
} | ||
|
||
TEST(Float16, sub) { | ||
float16 a = 2.0; | ||
float16 b = 0.5; | ||
EXPECT_EQ(a - b, float16(float(a) - float(b))); | ||
} | ||
|
||
TEST(Float16, mul) { | ||
float16 a = 3.5; | ||
float16 b = 3.0; | ||
EXPECT_EQ(a * b, float16(float(a) * float(b))); | ||
} | ||
|
||
TEST(Float16, div) { | ||
float16 a = 16.5; | ||
float16 b = -3.0; | ||
EXPECT_EQ(a / b, float16(float(a) / float(b))); | ||
} | ||
|
||
TEST(Float16, gt) { | ||
float16 a = 13.25; | ||
float16 b = 3.56; | ||
EXPECT_EQ(a > b, float(a) > float(b)); | ||
EXPECT_TRUE(a > b); | ||
} | ||
|
||
TEST(Float16, lt) { | ||
float16 a = 123.75; | ||
float16 b = -12.6; | ||
EXPECT_EQ(a < b, float(a) < float(b)); | ||
EXPECT_FALSE(a < b); | ||
} | ||
|
||
TEST(Float16, eq) { | ||
float16 a = -483.455; | ||
float16 b = 453.0; | ||
EXPECT_EQ(a == b, float(a) == float(b)); | ||
EXPECT_FALSE(a == b); | ||
EXPECT_TRUE(a == a); | ||
} |