| 1 |
|
# Copyright 1999-2024 Gentoo Authors
|
|
1 |
# Copyright 1999-2025 Gentoo Authors
|
| 2 |
2 |
# Distributed under the terms of the GNU General Public License v2
|
| 3 |
3 |
|
|
4 |
# shellcheck disable=SC2317
|
| 4 |
5 |
EAPI=8
|
| 5 |
6 |
|
| 6 |
7 |
ROCM_VERSION=${PV}
|
|
8 |
PYTHON_COMPAT=( python3_{10..13} python3_13t )
|
| 7 |
9 |
|
| 8 |
|
inherit cmake flag-o-matic rocm
|
|
10 |
inherit check-reqs cmake flag-o-matic multiprocessing python-r1 rocm
|
| 9 |
11 |
|
| 10 |
12 |
GTEST_COMMIT="b85864c64758dec007208e56af933fc3f52044ee"
|
| 11 |
13 |
GTEST_FILE="gtest-1.14.0_p20220421.tar.gz"
|
| ... | ... | |
| 20 |
22 |
SLOT="0/$(ver_cut 1-2)"
|
| 21 |
23 |
KEYWORDS="~amd64"
|
| 22 |
24 |
|
| 23 |
|
IUSE="debug test"
|
| 24 |
|
REQUIRED_USE="${ROCM_REQUIRED_USE}"
|
|
25 |
IUSE="debug profiler test"
|
|
26 |
REQUIRED_USE="${ROCM_REQUIRED_USE} ${PYTHON_REQUIRED_USE}"
|
| 25 |
27 |
RESTRICT="!test? ( test )"
|
| 26 |
28 |
|
| 27 |
29 |
RDEPEND="
|
| 28 |
30 |
dev-util/hip:${SLOT}
|
| 29 |
|
>=dev-db/sqlite-3.17
|
| 30 |
|
sci-libs/rocBLAS:${SLOT}[${ROCM_USEDEP}]
|
| 31 |
|
>=dev-libs/boost-1.72
|
| 32 |
|
dev-cpp/nlohmann_json
|
| 33 |
|
dev-cpp/frugally-deep
|
|
31 |
${PYTHON_DEPS}
|
| 34 |
32 |
"
|
| 35 |
33 |
|
| 36 |
34 |
DEPEND="${RDEPEND}"
|
| ... | ... | |
| 41 |
39 |
|
| 42 |
40 |
PATCHES=(
|
| 43 |
41 |
"${FILESDIR}"/${PN}-6.1.1-enable-examples.patch
|
| 44 |
|
"${FILESDIR}"/${PN}-6.1.1-fix-clang-17-no-offload-uniform-block.patch
|
| 45 |
42 |
"${FILESDIR}"/${PN}-6.1.1-no-git-no-hash.patch
|
| 46 |
|
"${FILESDIR}"/${PN}-6.1.1-fix-libcxx.patch
|
|
43 |
"${FILESDIR}"/${PN}-6.3.0-no-inline-all.patch
|
|
44 |
"${FILESDIR}"/${PN}-6.3.0-conditional-kernels.patch
|
|
45 |
"${FILESDIR}"/${PN}-6.3.0-conditional-ckprofiler.patch
|
|
46 |
"${FILESDIR}"/${PN}-6.3.0-expand-isa.patch
|
| 47 |
47 |
)
|
| 48 |
48 |
|
|
49 |
ck_check-reqs() {
|
|
50 |
[[ ${MERGE_TYPE} == binary ]] && return
|
|
51 |
|
|
52 |
targets=($AMDGPU_TARGETS)
|
|
53 |
if [[ ${#targets[@]} -gt 1 ]]; then
|
|
54 |
ewarn "composable-kernel will be compiled for multiple GPU architectures,"
|
|
55 |
ewarn "which will take a significant amount of time."
|
|
56 |
ewarn "Please consider setting AMDGPU_TARGETS USE_EXPAND variable to a single architecture."
|
|
57 |
fi
|
|
58 |
|
|
59 |
# It takes ~2Gb of RAM per build thread
|
|
60 |
local user_jobs=$(makeopts_jobs)
|
|
61 |
local free_memory_mb=$(free -m | awk '/Mem:/ {print $4}')
|
|
62 |
local max_jobs=$(( free_memory_mb / 2048 ))
|
|
63 |
max_jobs=$(( max_jobs < 1 ? 1 : max_jobs ))
|
|
64 |
local limited_jobs=$(( user_jobs < max_jobs ? user_jobs : max_jobs ))
|
|
65 |
if [[ "${max_jobs}" -lt "${user_jobs}" ]]; then
|
|
66 |
ewarn "${free_memory_mb} MB of free RAM is not enough for ${user_jobs} parallel build jobs (~2Gb per job)."
|
|
67 |
ewarn "Please consider setting MAKEOPTS=\"-j${limited_jobs}\" for this package."
|
|
68 |
fi
|
|
69 |
|
|
70 |
local CHECKREQS_MEMORY=$((user_jobs*2048))M
|
|
71 |
check-reqs_${EBUILD_PHASE_FUNC}
|
|
72 |
}
|
|
73 |
|
|
74 |
pkg_pretend() {
|
|
75 |
ck_check-reqs
|
|
76 |
}
|
|
77 |
|
|
78 |
pkg_setup() {
|
|
79 |
ck_check-reqs
|
|
80 |
}
|
|
81 |
|
| 49 |
82 |
src_prepare() {
|
| 50 |
83 |
sed -e '/-Werror/d' -i cmake/EnableCompilerWarnings.cmake || die
|
| 51 |
84 |
cmake_src_prepare
|
| ... | ... | |
| 68 |
101 |
-DGPU_TARGETS="$(get_amdgpu_flags)"
|
| 69 |
102 |
-DCMAKE_INSTALL_PREFIX="${EPREFIX}/usr"
|
| 70 |
103 |
-DBUILD_TESTING=$(usex test ON OFF)
|
|
104 |
-DCK_USE_PROFILER=$(usex profiler ON OFF)
|
|
105 |
-Wno-dev
|
| 71 |
106 |
)
|
| 72 |
107 |
|
| 73 |
108 |
if use test; then
|
| 74 |
109 |
mycmakeargs+=(
|
| 75 |
|
-DGOOGLETEST_DIR="${WORKDIR}/googletest-${GTEST_COMMIT}"
|
|
110 |
-DFETCHCONTENT_SOURCE_DIR_GTEST="${WORKDIR}/googletest-${GTEST_COMMIT}"
|
| 76 |
111 |
)
|
| 77 |
112 |
fi
|
| 78 |
113 |
|
| 79 |
114 |
cmake_src_configure
|
| 80 |
115 |
}
|
| 81 |
116 |
|
|
117 |
src_install() {
|
|
118 |
cmake_src_install
|
|
119 |
|
|
120 |
installation() {
|
|
121 |
python_domodule python/ck4inductor
|
|
122 |
|
|
123 |
# install package-data manually, as there is no PEP517 compliance
|
|
124 |
shopt -s globstar
|
|
125 |
package_data=(
|
|
126 |
include/ck/**/*.hpp
|
|
127 |
library/src/tensor_operation_instance/gpu/gemm_universal/**/*.hpp
|
|
128 |
)
|
|
129 |
shopt -u globstar
|
|
130 |
|
|
131 |
inst_path="${D}$(python_get_sitedir)/ck4inductor"
|
|
132 |
for file in "${package_data[@]}"; do
|
|
133 |
location="${inst_path}/$(dirname "$file")"
|
|
134 |
mkdir -p "${location}"
|
|
135 |
cp "${file}" "${location}"
|
|
136 |
done
|
|
137 |
}
|
|
138 |
python_foreach_impl installation
|
|
139 |
}
|
|
140 |
|
| 82 |
141 |
src_test() {
|
| 83 |
142 |
check_amdgpu
|
| 84 |
143 |
LD_LIBRARY_PATH="${BUILD_DIR}"/lib cmake_src_test -j1
|