Diff showfont-1.0.7 with a composable-kernel-7.1.0

/usr/portage/sci-libs/composable-kernel/composable-kernel-7.1.0.ebuild 2025-12-01 18:18:04.705253301 +0300
3 3

  
4 4
EAPI=8
5 5

  
6
inherit xorg-meson
6
ROCM_VERSION=${PV}
7
PYTHON_COMPAT=( python3_{10..14} python3_13t )
7 8

  
8
DESCRIPTION="font dumper for X font server"
9
inherit check-reqs cmake flag-o-matic multiprocessing python-r1 rocm
9 10

  
10
KEYWORDS="amd64 arm ~arm64 ~mips ppc ppc64 ~s390 ~sparc x86"
11
GTEST_COMMIT="b85864c64758dec007208e56af933fc3f52044ee"
12
GTEST_FILE="gtest-1.14.0_p20220421.tar.gz"
11 13

  
12
RDEPEND="x11-libs/libFS"
13
DEPEND="${RDEPEND}
14
	x11-base/xorg-proto"
14
DESCRIPTION="High Performance Composable Kernel for AMD GPUs"
15
HOMEPAGE="https://github.com/ROCm/composable_kernel"
16
SRC_URI="https://github.com/ROCm/composable_kernel/archive/rocm-${PV}.tar.gz -> ${P}.tar.gz
17
	test? ( https://github.com/google/googletest/archive/${GTEST_COMMIT}.tar.gz -> ${GTEST_FILE} )"
18
S="${WORKDIR}/composable_kernel-rocm-${PV}"
19

  
20
LICENSE="MIT"
21
SLOT="0/$(ver_cut 1-2)"
22
KEYWORDS="~amd64"
23

  
24
IUSE="debug profiler test"
25
REQUIRED_USE="${ROCM_REQUIRED_USE} ${PYTHON_REQUIRED_USE}"
26
RESTRICT="!test? ( test )"
27

  
28
RDEPEND="
29
	dev-util/hip:${SLOT}
30
	${PYTHON_DEPS}
31
"
32

  
33
DEPEND="${RDEPEND}"
34

  
35
BDEPEND="
36
	dev-build/rocm-cmake
37
"
38

  
39
PATCHES=(
40
	"${FILESDIR}"/${PN}-6.1.1-no-git-no-hash.patch
41
	"${FILESDIR}"/${PN}-6.3.0-conditional-kernels.patch
42
	"${FILESDIR}"/${PN}-7.0.1-conditional-ckprofiler.patch
43
	"${FILESDIR}"/${PN}-7.0.1-libcxx-includes.patch
44
	"${FILESDIR}"/${PN}-7.1.0-expand-isa.patch
45
)
46

  
47
ck_check-reqs() {
48
	[[ ${MERGE_TYPE} == binary ]] && return
49

  
50
	targets=($AMDGPU_TARGETS)
51
	if [[ ${#targets[@]} -gt 1 ]]; then
52
		ewarn "composable-kernel will be compiled for multiple GPU architectures,"
53
		ewarn "which will take a significant amount of time."
54
		ewarn "Please consider setting AMDGPU_TARGETS USE_EXPAND variable to a single architecture."
55
	fi
56

  
57
	# It takes ~3GB of RAM per build thread
58
	local user_jobs=$(makeopts_jobs)
59
	local available_memory_mb=$(free -m | awk '/Mem:/ {print $7}')
60
	local max_jobs=$(( available_memory_mb / 2048 ))
61
	max_jobs=$(( max_jobs < 1 ? 1 : max_jobs ))
62
	local limited_jobs=$(( user_jobs < max_jobs ? user_jobs : max_jobs ))
63
	if [[ "${max_jobs}" -lt "${user_jobs}" ]]; then
64
		ewarn "${available_memory_mb} MB of free RAM is not enough for ${user_jobs} parallel build jobs (~2Gb per job)."
65
		ewarn "Please consider setting MAKEOPTS=\"-j${limited_jobs}\" for this package."
66
	fi
67

  
68
	local CHECKREQS_MEMORY=$((user_jobs*3072))M
69
	check-reqs_${EBUILD_PHASE_FUNC}
70
}
71

  
72
pkg_pretend() {
73
	ck_check-reqs
74
}
75

  
76
pkg_setup() {
77
	ck_check-reqs
78
}
79

  
80
src_prepare() {
81
	sed -e '/-Werror/d' -i cmake/EnableCompilerWarnings.cmake || die
82

  
83
	# don't build examples
84
	sed -e "/add_subdirectory(example)/d" -i CMakeLists.txt || die
85

  
86
	# Flag -amdgpu-early-inline-all explodes memory consumption
87
	# https://github.com/llvm/llvm-project/issues/86332
88
	sed -e "/-amdgpu-early-inline-all/d" -e "/-amdgpu-function-calls/d" -i CMakeLists.txt || die
89

  
90
	cmake_src_prepare
91
}
92

  
93
src_configure() {
94
	rocm_use_hipcc
95

  
96
	if ! use debug; then
97
		append-cflags "-DNDEBUG"
98
		append-cxxflags "-DNDEBUG"
99
		CMAKE_BUILD_TYPE="Release"
100
	else
101
		CMAKE_BUILD_TYPE="Debug"
102
	fi
103

  
104
	local mycmakeargs=(
105
		-DCMAKE_SKIP_RPATH=ON
106
		-DBUILD_DEV=OFF
107
		-DGPU_TARGETS="$(get_amdgpu_flags)"
108
		-DCMAKE_INSTALL_PREFIX="${EPREFIX}/usr"
109
		-DBUILD_TESTING=$(usex test ON OFF)
110
		-DCK_USE_PROFILER=$(usex profiler ON OFF)
111
		-Wno-dev
112
	)
113

  
114
	# Since 6.4.1 "fallback" DL kernels should be enabled manually...
115
	if use amdgpu_targets_gfx1010 || use amdgpu_targets_gfx1011 || use amdgpu_targets_gfx1012 \
116
	|| use amdgpu_targets_gfx1030 || use amdgpu_targets_gfx1031 ; then
117
		mycmakeargs+=(-DDL_KERNELS=ON)
118
	fi
119

  
120
	if use test; then
121
		mycmakeargs+=(
122
			-DFETCHCONTENT_SOURCE_DIR_GTEST="${WORKDIR}/googletest-${GTEST_COMMIT}"
123
		)
124
	fi
125

  
126
	# rocminfo call during configuration; should not happen
127
	# Bug: https://github.com/ROCm/composable_kernel/issues/2994
128
	rocm_add_sandbox -w
129
	addpredict /dev/random
130

  
131
	cmake_src_configure
132
}
133

  
134
src_install() {
135
	cmake_src_install
136

  
137
	# shellcheck disable=SC2329
138
	installation() {
139
		python_domodule python/ck4inductor
140

  
141
		# install package-data manually, as there is no PEP517 compliance
142
		shopt -s globstar
143
		package_data=(
144
			include/ck/**/*.hpp
145
			library/src/tensor_operation_instance/gpu/gemm_universal/**/*.hpp
146
		)
147
		shopt -u globstar
148

  
149
		inst_path="${D}$(python_get_sitedir)/ck4inductor"
150
		for file in "${package_data[@]}"; do
151
			location="${inst_path}/$(dirname "$file")"
152
			mkdir -p "${location}"
153
			cp "${file}" "${location}"
154
		done
155
	}
156
	python_foreach_impl installation
157
}
158

  
159
src_test() {
160
	check_amdgpu
161
	LD_LIBRARY_PATH="${BUILD_DIR}"/lib cmake_src_test -j1
162
}
Thank you!