Add C10_EMBEDDED to gate ostream usage in Half/BFloat16 (#140566)

We want to use Half/BFloat16 in ExecuTorch to support shared kernel code. They will need to be used in ExecuTorch core, so they can't have streams. This diff introduces a macro to gate the stream code off.

Differential Revision: [D65888035](https://our.internmc.facebook.com/intern/diff/D65888035/)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/140566
Approved by: https://github.com/ezyang, https://github.com/malfet
ghstack dependencies: #140564, #140565
This commit is contained in:
Scott Wolchok 2024-11-14 10:33:58 -08:00 committed by PyTorch MergeBot
parent 0f1a88cfba
commit f59ec98ceb
2 changed files with 8 additions and 0 deletions

View file

@ -8,7 +8,9 @@
#include <cstdint>
#include <cstring>
#include <iosfwd>
#ifndef C10_EMBEDDED
#include <ostream>
#endif // C10_EMBEDDED
#if defined(__CUDACC__) && !defined(USE_ROCM)
#include <cuda_bf16.h>
@ -114,12 +116,14 @@ struct alignas(2) BFloat16 {
#endif
};
#ifndef C10_EMBEDDED
C10_API inline std::ostream& operator<<(
std::ostream& out,
const BFloat16& value) {
out << (float)value;
return out;
}
#endif // C10_EMBEDDED
} // namespace c10

View file

@ -29,7 +29,9 @@
#include <cstring>
#include <iosfwd>
#include <limits>
#ifndef C10_EMBEDDED
#include <ostream>
#endif // C10_EMBEDDED
#ifdef __CUDACC__
#include <cuda_fp16.h>
@ -384,10 +386,12 @@ struct alignas(2) Half {
#endif
};
#ifndef C10_EMBEDDED
C10_API inline std::ostream& operator<<(std::ostream& out, const Half& value) {
out << (float)value;
return out;
}
#endif // C10_EMBEDDED
} // namespace c10