aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dev-util/Tensile/Tensile-4.0.0-r1.ebuild62
-rw-r--r--dev-util/Tensile/Tensile-4.0.0.ebuild37
-rw-r--r--dev-util/Tensile/files/Tensile-4.0.0-cmake.patch38
-rw-r--r--dev-util/Tensile/files/Tensile-4.0.0-locate-commands.patch62
-rw-r--r--dev-util/Tensile/files/Tensile-4.0.0-output-currentISA.patch22
-rw-r--r--dev-util/Tensile/files/Tensile-4.0.0-setup.py-cmake.patch13
6 files changed, 231 insertions, 3 deletions
diff --git a/dev-util/Tensile/Tensile-4.0.0-r1.ebuild b/dev-util/Tensile/Tensile-4.0.0-r1.ebuild
new file mode 100644
index 000000000..eda97d225
--- /dev/null
+++ b/dev-util/Tensile/Tensile-4.0.0-r1.ebuild
@@ -0,0 +1,62 @@
+# Copyright 1999-2021 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=7
+
+PYTHON_COMPAT=( python3_{7,8,9} )
+DISTUTILS_USE_SETUPTOOLS=rdepend
+
+inherit distutils-r1
+
+DESCRIPTION="Stretching GPU performance for GEMMs and tensor contractions"
+HOMEPAGE="https://github.com/ROCmSoftwarePlatform/Tensile"
+SRC_URI="https://github.com/ROCmSoftwarePlatform/Tensile/archive/rocm-${PV}.tar.gz -> rocm-Tensile-${PV}.tar.gz"
+
+LICENSE="MIT"
+KEYWORDS="~amd64"
+SLOT="0"
+IUSE=""
+
+RDEPEND="${PYTHON_DEPS}
+ dev-python/pyyaml[${PYTHON_USEDEP}]
+ dev-python/msgpack[${PYTHON_USEDEP}]"
+DEPEND="${RDEPEND}
+ dev-util/hip"
+
+PATCHES=( "${FILESDIR}"/${PN}-4.0.0-cmake.patch
+ "${FILESDIR}"/${PN}-4.0.0-setup.py-cmake.patch
+ "${FILESDIR}"/${PN}-4.0.0-locate-commands.patch
+ "${FILESDIR}"/${PN}-4.0.0-output-currentISA.patch )
+
+S="${WORKDIR}/${PN}-rocm-${PVR}"
+CMAKE_USE_DIR="${WORKDIR}/Source"
+
+src_prepare() {
+ distutils-r1_src_prepare
+
+ mv ${PN}/Source "${WORKDIR}"/ || die
+ sed -e "/ROCM_SMI_ROOT/s,lib,$(get_libdir)," \
+ -i "${WORKDIR}"/Source/cmake/FindROCmSMI.cmake || die
+ sed -r -e "/TENSILE_USE_LLVM/s/ON/OFF/" \
+ -i "${WORKDIR}"/Source/CMakeLists.txt || die
+
+ mv ${PN}/cmake "${T}"/ || die
+
+ sed -e "/HipClangVersion/s/0,0,0/$(ver_rs 1-3 ,)/" \
+ -e "/SourcePath/s,os\.path\.join.*$,\"${EPREFIX}/usr/share/${PN}\"," \
+ -i ${PN}/Common.py || die
+
+ sed -e "s|os\.path\.dirname.*$|\"${EPREFIX}/usr/share/Tensile\", end='')|" \
+ -i ${PN}/__init__.py || die
+}
+
+src_install() {
+ distutils-r1_src_install
+
+ insinto /usr/$(get_libdir)/cmake/${PN}
+ doins "${T}"/cmake/*.cmake
+
+ insinto /usr/share/${PN}
+ doins -r "${WORKDIR}"/Source/*
+ dosym . /usr/share/${PN}/Source
+}
diff --git a/dev-util/Tensile/Tensile-4.0.0.ebuild b/dev-util/Tensile/Tensile-4.0.0.ebuild
index b930c3d69..eda97d225 100644
--- a/dev-util/Tensile/Tensile-4.0.0.ebuild
+++ b/dev-util/Tensile/Tensile-4.0.0.ebuild
@@ -4,6 +4,7 @@
EAPI=7
PYTHON_COMPAT=( python3_{7,8,9} )
+DISTUTILS_USE_SETUPTOOLS=rdepend
inherit distutils-r1
@@ -19,13 +20,43 @@ IUSE=""
RDEPEND="${PYTHON_DEPS}
dev-python/pyyaml[${PYTHON_USEDEP}]
dev-python/msgpack[${PYTHON_USEDEP}]"
-DEPEND="${RDEPEND}"
+DEPEND="${RDEPEND}
+ dev-util/hip"
+
+PATCHES=( "${FILESDIR}"/${PN}-4.0.0-cmake.patch
+ "${FILESDIR}"/${PN}-4.0.0-setup.py-cmake.patch
+ "${FILESDIR}"/${PN}-4.0.0-locate-commands.patch
+ "${FILESDIR}"/${PN}-4.0.0-output-currentISA.patch )
S="${WORKDIR}/${PN}-rocm-${PVR}"
+CMAKE_USE_DIR="${WORKDIR}/Source"
+
+src_prepare() {
+ distutils-r1_src_prepare
+
+ mv ${PN}/Source "${WORKDIR}"/ || die
+ sed -e "/ROCM_SMI_ROOT/s,lib,$(get_libdir)," \
+ -i "${WORKDIR}"/Source/cmake/FindROCmSMI.cmake || die
+ sed -r -e "/TENSILE_USE_LLVM/s/ON/OFF/" \
+ -i "${WORKDIR}"/Source/CMakeLists.txt || die
+
+ mv ${PN}/cmake "${T}"/ || die
+
+ sed -e "/HipClangVersion/s/0,0,0/$(ver_rs 1-3 ,)/" \
+ -e "/SourcePath/s,os\.path\.join.*$,\"${EPREFIX}/usr/share/${PN}\"," \
+ -i ${PN}/Common.py || die
+
+ sed -e "s|os\.path\.dirname.*$|\"${EPREFIX}/usr/share/Tensile\", end='')|" \
+ -i ${PN}/__init__.py || die
+}
src_install() {
distutils-r1_src_install
- dodir "/usr/$(get_libdir)/cmake"
- mv "${ED}/usr/cmake" "${ED}/usr/$(get_libdir)/cmake/${PN}" || die
+ insinto /usr/$(get_libdir)/cmake/${PN}
+ doins "${T}"/cmake/*.cmake
+
+ insinto /usr/share/${PN}
+ doins -r "${WORKDIR}"/Source/*
+ dosym . /usr/share/${PN}/Source
}
diff --git a/dev-util/Tensile/files/Tensile-4.0.0-cmake.patch b/dev-util/Tensile/files/Tensile-4.0.0-cmake.patch
new file mode 100644
index 000000000..0c39f747c
--- /dev/null
+++ b/dev-util/Tensile/files/Tensile-4.0.0-cmake.patch
@@ -0,0 +1,38 @@
+Index: Tensile-rocm-4.0.0/Tensile/cmake/TensileConfig.cmake
+===================================================================
+--- Tensile-rocm-4.0.0.orig/Tensile/cmake/TensileConfig.cmake
++++ Tensile-rocm-4.0.0/Tensile/cmake/TensileConfig.cmake
+@@ -26,7 +26,7 @@ if(NOT DEFINED Tensile_ROOT)
+ get_filename_component(Tensile_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH)
+ get_filename_component(Tensile_PREFIX "${Tensile_PREFIX}" PATH)
+
+-execute_process(COMMAND "${Tensile_PREFIX}/bin/TensileGetPath" OUTPUT_VARIABLE Tensile_ROOT)
++execute_process(COMMAND "TensileGetPath" OUTPUT_VARIABLE Tensile_ROOT)
+ endif()
+ list(APPEND CMAKE_MODULE_PATH "${Tensile_ROOT}/Source/cmake/")
+ list(APPEND CMAKE_MODULE_PATH "${Tensile_ROOT}/Source/")
+@@ -93,7 +93,7 @@ function(TensileCreateLibraryFiles
+ set(Tensile_MERGE_FILES OFF)
+ endif()
+
+- set(Script "${Tensile_ROOT}/bin/TensileCreateLibrary")
++ set(Script "TensileCreateLibrary")
+ message(STATUS "Tensile script: ${Script}")
+
+ set(Options "--new-client-only" "--no-legacy-components")
+Index: Tensile-rocm-4.0.0/Tensile/Source/TensileCreateLibrary.cmake
+===================================================================
+--- Tensile-rocm-4.0.0.orig/Tensile/Source/TensileCreateLibrary.cmake
++++ Tensile-rocm-4.0.0/Tensile/Source/TensileCreateLibrary.cmake
+@@ -52,10 +52,7 @@ function(TensileCreateLibraryCmake
+ message(STATUS "Tensile_ARCHITECTURE from TensileCreateLibraryCmake : ${Tensile_ARCHITECTURE}")
+ message(STATUS "Tensile_LIBRARY_FORMAT from TensileCreateLibraryCmake : ${Tensile_LIBRARY_FORMAT}")
+
+- execute_process(COMMAND chmod 755 ${Tensile_ROOT}/bin/TensileCreateLibrary)
+- execute_process(COMMAND chmod 755 ${Tensile_ROOT}/bin/Tensile)
+-
+- set(Tensile_CREATE_COMMAND "${Tensile_ROOT}/bin/TensileCreateLibrary")
++ set(Tensile_CREATE_COMMAND "TensileCreateLibrary")
+
+ set(Tensile_SOURCE_PATH "${PROJECT_BINARY_DIR}/Tensile")
+ message(STATUS "Tensile_SOURCE_PATH=${Tensile_SOURCE_PATH}")
diff --git a/dev-util/Tensile/files/Tensile-4.0.0-locate-commands.patch b/dev-util/Tensile/files/Tensile-4.0.0-locate-commands.patch
new file mode 100644
index 000000000..78d467349
--- /dev/null
+++ b/dev-util/Tensile/files/Tensile-4.0.0-locate-commands.patch
@@ -0,0 +1,62 @@
+Index: Tensile-rocm-4.0.0/Tensile/Common.py
+===================================================================
+--- Tensile-rocm-4.0.0.orig/Tensile/Common.py
++++ Tensile-rocm-4.0.0/Tensile/Common.py
+@@ -155,7 +155,7 @@ globalParameters["PrintTensorRef"] = 0
+ globalParameters["PrintIndexAssignments"] = 0 # Print the tensor index assignment info
+ globalParameters["PrintTensorRef"] = 0 # Print reference tensor. 0x1=after init; 0x2=after copy-back; 0x3=both
+ globalParameters["PrintWinnersOnly"] = False # Only print the solutions which become the fastest
+-globalParameters["PrintCodeCommands"] = False # print the commands used to generate the code objects (asm,link,hip-clang, etc)
++globalParameters["PrintCodeCommands"] = True # print the commands used to generate the code objects (asm,link,hip-clang, etc)
+
+ # TODO - remove this when NewClient is mainstream
+ globalParameters["OldClientSourceTmp"] = True # Use an intermediate sourceTmp dir to detect file changes and minimize rebuilds on old client
+@@ -1521,14 +1521,14 @@ def assignGlobalParameters( config ):
+ print2(" %24s: %8s (unspecified)" % (key, defaultValue))
+
+ # ROCm Agent Enumerator Path
+- globalParameters["ROCmAgentEnumeratorPath"] = locateExe("/opt/rocm/bin", "rocm_agent_enumerator")
++ globalParameters["ROCmAgentEnumeratorPath"] = locateExe("", "rocm_agent_enumerator")
+ if "CxxCompiler" in config:
+ globalParameters["CxxCompiler"] = config["CxxCompiler"]
+
+ if "TENSILE_ROCM_ASSEMBLER_PATH" in os.environ:
+ globalParameters["AssemblerPath"] = os.environ.get("TENSILE_ROCM_ASSEMBLER_PATH")
+ elif globalParameters["AssemblerPath"] is None and globalParameters["CxxCompiler"] == "hipcc":
+- globalParameters["AssemblerPath"] = locateExe("/opt/rocm/llvm/bin", "clang++")
++ globalParameters["AssemblerPath"] = locateExe("", "clang++")
+ elif globalParameters["AssemblerPath"] is None and globalParameters["CxxCompiler"] == "hcc":
+ globalParameters["AssemblerPath"] = locateExe("/opt/rocm/bin", "hcc")
+
+@@ -1536,8 +1536,8 @@ def assignGlobalParameters( config ):
+ if globalParameters["CxxCompiler"] == "hcc":
+ globalParameters["ExtractKernelPath"] = locateExe("/opt/rocm/bin", "extractkernel")
+ else:
+- globalParameters["ExtractKernelPath"] = locateExe("/opt/rocm/hip/bin", "extractkernel")
+- globalParameters["ClangOffloadBundlerPath"] = locateExe("/opt/rocm/llvm/bin", "clang-offload-bundler")
++ globalParameters["ExtractKernelPath"] = locateExe("", "extractkernel")
++ globalParameters["ClangOffloadBundlerPath"] = locateExe("", "clang-offload-bundler")
+
+ if "ROCmAgentEnumeratorPath" in config:
+ globalParameters["ROCmAgentEnumeratorPath"] = config["ROCmAgentEnumeratorPath"]
+@@ -1579,20 +1579,6 @@ def assignGlobalParameters( config ):
+ # Due to platform.linux_distribution() being deprecated, just try to run dpkg regardless.
+ # The alternative would be to install the `distro` package.
+ # See https://docs.python.org/3.7/library/platform.html#platform.linux_distribution
+- try:
+- if globalParameters["CxxCompiler"] == "hipcc":
+- output = subprocess.run(["dpkg", "-l", "hip-rocclr"], check=True, stdout=subprocess.PIPE).stdout.decode()
+- elif globalParameters["CxxCompiler"] == "hcc":
+- output = subprocess.run(["dpkg", "-l", "hcc"], check=True, stdout=subprocess.PIPE).stdout.decode()
+-
+- for line in output.split('\n'):
+- if 'hipcc' in line:
+- globalParameters['HipClangVersion'] = line.split()[2]
+- elif 'hcc' in line:
+- globalParameters['HccVersion'] = line.split()[2]
+-
+- except (subprocess.CalledProcessError, OSError) as e:
+- printWarning("Error: {} looking for package {}: {}".format('dpkg', 'hip-rocclr', e))
+
+ for key in config:
+ value = config[key]
diff --git a/dev-util/Tensile/files/Tensile-4.0.0-output-currentISA.patch b/dev-util/Tensile/files/Tensile-4.0.0-output-currentISA.patch
new file mode 100644
index 000000000..a00f9fdca
--- /dev/null
+++ b/dev-util/Tensile/files/Tensile-4.0.0-output-currentISA.patch
@@ -0,0 +1,22 @@
+Index: Tensile-rocm-4.0.0/Tensile/TensileCreateLibrary.py
+===================================================================
+--- Tensile-rocm-4.0.0.orig/Tensile/TensileCreateLibrary.py
++++ Tensile-rocm-4.0.0/Tensile/TensileCreateLibrary.py
+@@ -132,7 +132,7 @@ def buildSourceCodeObjectFile(CxxCompile
+ return globalParameters["AsmCaps"][arch]["SupportedISA"] and \
+ globalParameters["AsmCaps"][arch]["SupportedSource"]
+
+- archs = ['gfx'+''.join(map(str,arch)) for arch in globalParameters['SupportedISA'] \
++ archs = ['gfx'+''.join(map(str,arch)) for arch in [globalParameters['CurrentISA']] \
+ if isSupported(arch)]
+
+ archFlags = ['--amdgpu-target=' + arch for arch in archs]
+@@ -1004,7 +1004,7 @@ def buildObjectFileNames(solutionWriter,
+ kernelHelperOjbNmaes = [ko.getKernelName() for ko in kernelHelperOjbs]
+
+ # Source based kernels are built for all supported architectures
+- sourceArchs = ['gfx'+''.join(map(str,arch)) for arch in globalParameters['SupportedISA'] \
++ sourceArchs = ['gfx'+''.join(map(str,arch)) for arch in [globalParameters['CurrentISA']] \
+ if isSupported(arch)]
+
+ # Asm based kernels target the configured ISA
diff --git a/dev-util/Tensile/files/Tensile-4.0.0-setup.py-cmake.patch b/dev-util/Tensile/files/Tensile-4.0.0-setup.py-cmake.patch
new file mode 100644
index 000000000..e020842b2
--- /dev/null
+++ b/dev-util/Tensile/files/Tensile-4.0.0-setup.py-cmake.patch
@@ -0,0 +1,13 @@
+Index: Tensile-rocm-4.0.0/setup.py
+===================================================================
+--- Tensile-rocm-4.0.0.orig/setup.py
++++ Tensile-rocm-4.0.0/setup.py
+@@ -30,8 +30,6 @@ setup(
+ install_requires=readRequirementsFromTxt(),
+ python_requires='>=3.5',
+ packages=["Tensile"],
+- package_data={ "Tensile": ["Tensile/cmake/*"] },
+- data_files=[ ("cmake", ["Tensile/cmake/TensileConfig.cmake", "Tensile/cmake/TensileConfigVersion.cmake"]) ],
+ include_package_data=True,
+ entry_points={"console_scripts": [
+ # user runs a benchmark