Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions include/API/Buffer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//===- Buffer.h - Offload API Buffer --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//

#ifndef OFFLOADTEST_API_BUFFER_H
#define OFFLOADTEST_API_BUFFER_H

#include "API/Resources.h"

namespace offloadtest {

struct BufferCreateDesc {
MemoryLocation Location;
};

class Buffer {
public:
virtual ~Buffer() = default;

Buffer(const Buffer &) = delete;
Buffer &operator=(const Buffer &) = delete;

protected:
Buffer() = default;
};

} // namespace offloadtest

#endif // OFFLOADTEST_API_BUFFER_H
38 changes: 17 additions & 21 deletions include/API/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@
#include "Config.h"

#include "API/API.h"
#include "API/Buffer.h"
#include "API/Capabilities.h"
#include "API/CommandBuffer.h"
#include "API/Texture.h"
#include "Support/Pipeline.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Error.h"
Expand All @@ -37,27 +40,6 @@ struct DeviceConfig {
bool EnableValidationLayer = false;
};

enum class MemoryLocation {
GpuOnly,
CpuToGpu,
GpuToCpu,
};

struct BufferCreateDesc {
MemoryLocation Location;
};

class Buffer {
public:
virtual ~Buffer() = default;

Buffer(const Buffer &) = delete;
Buffer &operator=(const Buffer &) = delete;

protected:
Buffer() = default;
};

class Fence {
public:
virtual ~Fence() = default;
Expand Down Expand Up @@ -99,6 +81,10 @@ class Device {
virtual llvm::Expected<std::shared_ptr<Buffer>>
createBuffer(std::string Name, BufferCreateDesc &Desc,
size_t SizeInBytes) = 0;
Comment thread
EmilioLaiso marked this conversation as resolved.

virtual llvm::Expected<std::shared_ptr<Texture>>
createTexture(std::string Name, TextureCreateDesc &Desc) = 0;

virtual void printExtra(llvm::raw_ostream &OS) {}

virtual llvm::Expected<std::unique_ptr<CommandBuffer>>
Expand All @@ -122,6 +108,16 @@ initializeMetalDevices(const DeviceConfig Config,
llvm::Expected<llvm::SmallVector<std::unique_ptr<Device>>>
initializeDevices(const DeviceConfig Config);

// Creates a render target texture using the format and dimensions from a
// CPUBuffer. Does not upload the buffer's data — only uses its description to
// configure the texture.
llvm::Expected<std::shared_ptr<Texture>>
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to use std::shared_ptr? I know a lot of projects avoid using it because it is slow.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would recommend using the shared_ptr. We want to be able to use reference counting to determine if resources need to be kept alive so that we can free them once they are no longer being used on the GPU timeline.

IMO the overhead of shared_ptr is a bit overblown (an atomic increment on copy). If we ever run into a case where we need thousands of textures that need to be kept alive we can look at alternative solutions for those use cases. However, in my experience, that case is only there for bindless resources where you are going to need an alternative anyway.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to revisit this. I'm not sure how you count "thousands of textures", but, as the number of tests increases, we could easily reach that over all of the tests. I'll let others like @bogner and @llvm-beanz give their opinions.

I'll go along with what the community thinks.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@manon-traverse - can you file an issue to follow up on this please? Ideally, we'd get consensus before the PR is merged, but we as long as we figure it all out it's ok to that post-merge.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with Steven here that shared_ptr should be avoided unless absolutely necessary. Regardless of whether or not the overhead of shared_ptr will become an issue for us here, relying on reference counting obscures object lifetimes and encourages messy design patterns. It is usually possible to have clear ownership of the object, which makes the code easier to understand and reason about, and generally leads to a more robust architecture.

createRenderTargetFromCPUBuffer(Device &Dev, const CPUBuffer &Buf);

// Creates a depth/stencil texture matching the dimensions of a render target.
llvm::Expected<std::shared_ptr<Texture>>
createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height);

} // namespace offloadtest

#endif // OFFLOADTEST_API_DEVICE_H
159 changes: 159 additions & 0 deletions include/API/FormatConversion.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
//===- FormatConversion.h - Refactoring helpers ---------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Transitional helpers for converting between the legacy DataFormat + Channels
// description system and the unified Format enum. This file should be deleted
// once the pipeline is fully migrated to use Format directly.
Comment on lines +1 to +11
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a tracking issue for this conversion and removal?

//
//===----------------------------------------------------------------------===//

#ifndef OFFLOADTEST_API_FORMATCONVERSION_H
#define OFFLOADTEST_API_FORMATCONVERSION_H

#include "API/Resources.h"
#include "API/Texture.h"
#include "Support/Pipeline.h"

#include "llvm/Support/Error.h"

namespace offloadtest {

// Bridge for code that still describes textures as DataFormat + Channels (e.g.
// render targets bound via CPUBuffer). Once the pipeline is refactored to use
// Format directly, this function can be removed.
inline llvm::Expected<Format> toFormat(DataFormat Format, int Channels) {
switch (Format) {
case DataFormat::Int16:
switch (Channels) {
case 1:
return Format::R16Sint;
case 2:
return Format::RG16Sint;
case 4:
return Format::RGBA16Sint;
}
break;
case DataFormat::UInt16:
switch (Channels) {
case 1:
return Format::R16Uint;
case 2:
return Format::RG16Uint;
case 4:
return Format::RGBA16Uint;
}
break;
case DataFormat::Int32:
switch (Channels) {
case 1:
return Format::R32Sint;
case 2:
return Format::RG32Sint;
case 4:
return Format::RGBA32Sint;
}
break;
case DataFormat::UInt32:
switch (Channels) {
case 1:
return Format::R32Uint;
case 2:
return Format::RG32Uint;
case 4:
return Format::RGBA32Uint;
}
break;
case DataFormat::Float32:
switch (Channels) {
case 1:
return Format::R32Float;
case 2:
return Format::RG32Float;
case 4:
return Format::RGBA32Float;
}
break;
case DataFormat::Depth32:
// D32FloatS8Uint is not expressible as DataFormat + Channels because the
// stencil component is uint8, not a second Depth32 channel. Once the
// pipeline uses Format directly, this limitation goes away.
if (Channels == 1)
return Format::D32Float;
break;
// No Format mapping for these DataFormats.
case DataFormat::Hex8:
case DataFormat::Hex16:
case DataFormat::Hex32:
case DataFormat::Hex64:
case DataFormat::UInt64:
case DataFormat::Int64:
case DataFormat::Float16:
case DataFormat::Float64:
case DataFormat::Bool:
return llvm::createStringError(std::errc::invalid_argument,
"DataFormat %d has no Format equivalent.",
static_cast<int>(Format));
}
return llvm::createStringError(std::errc::invalid_argument,
"No Format for DataFormat %d with %d "
"channel(s).",
static_cast<int>(Format), Channels);
}

// Validates that a TextureCreateDesc is consistent with the CPUBuffer it was
// derived from. Call this after building a TextureCreateDesc from a CPUBuffer
// to catch mismatches between the two description systems.
inline llvm::Error
validateTextureDescMatchesCPUBuffer(const TextureCreateDesc &Desc,
const CPUBuffer &Buf) {
auto ExpectedFmt = toFormat(Buf.Format, Buf.Channels);
if (!ExpectedFmt)
return ExpectedFmt.takeError();
if (Desc.Format != *ExpectedFmt)
return llvm::createStringError(
std::errc::invalid_argument,
"TextureCreateDesc format '%s' does not match CPUBuffer format "
"(DataFormat %d, %d channels -> '%s').",
getFormatName(Desc.Format).data(), static_cast<int>(Buf.Format),
Buf.Channels, getFormatName(*ExpectedFmt).data());
if (Desc.Width != static_cast<uint32_t>(Buf.OutputProps.Width))
return llvm::createStringError(
std::errc::invalid_argument,
"TextureCreateDesc width %u does not match CPUBuffer width %d.",
Desc.Width, Buf.OutputProps.Width);
if (Desc.Height != static_cast<uint32_t>(Buf.OutputProps.Height))
return llvm::createStringError(
std::errc::invalid_argument,
"TextureCreateDesc height %u does not match CPUBuffer height %d.",
Desc.Height, Buf.OutputProps.Height);
if (Desc.MipLevels != static_cast<uint32_t>(Buf.OutputProps.MipLevels))
return llvm::createStringError(
std::errc::invalid_argument,
"TextureCreateDesc mip levels %u does not match CPUBuffer mip "
"levels %d.",
Desc.MipLevels, Buf.OutputProps.MipLevels);
const uint32_t TexelSize = getFormatSizeInBytes(Desc.Format);
if (Buf.Stride > 0 && static_cast<uint32_t>(Buf.Stride) != TexelSize)
return llvm::createStringError(
std::errc::invalid_argument,
"CPUBuffer stride %d does not match texture format element size %u.",
Buf.Stride, TexelSize);
const uint64_t ExpectedSize =
static_cast<uint64_t>(Desc.Width) * Desc.Height * TexelSize;
if (static_cast<uint64_t>(Buf.size()) != ExpectedSize)
return llvm::createStringError(
std::errc::invalid_argument,
"CPUBuffer size %u does not match expected size %llu "
"(width %u * height %u * element size %u).",
Buf.size(), ExpectedSize, Desc.Width, Desc.Height, TexelSize);
return llvm::Error::success();
}

} // namespace offloadtest

#endif // OFFLOADTEST_API_FORMATCONVERSION_H
Loading
Loading