Сравнение composable-kernel-6.1.1 с composable-kernel-6.3.0

/usr/portage/sci-libs/composable-kernel/composable-kernel-6.3.0.ebuild 2025-07-29 16:22:17.332467742 +0300
1
# Copyright 1999-2024 Gentoo Authors
1
# Copyright 1999-2025 Gentoo Authors
2 2
# Distributed under the terms of the GNU General Public License v2
3 3

  
4
# shellcheck disable=SC2317
4 5
EAPI=8
5 6

  
6 7
ROCM_VERSION=${PV}
8
PYTHON_COMPAT=( python3_{10..13} python3_13t )
7 9

  
8
inherit cmake flag-o-matic rocm
10
inherit check-reqs cmake flag-o-matic multiprocessing python-r1 rocm
9 11

  
10 12
GTEST_COMMIT="b85864c64758dec007208e56af933fc3f52044ee"
11 13
GTEST_FILE="gtest-1.14.0_p20220421.tar.gz"
......
20 22
SLOT="0/$(ver_cut 1-2)"
21 23
KEYWORDS="~amd64"
22 24

  
23
IUSE="debug test"
24
REQUIRED_USE="${ROCM_REQUIRED_USE}"
25
IUSE="debug profiler test"
26
REQUIRED_USE="${ROCM_REQUIRED_USE} ${PYTHON_REQUIRED_USE}"
25 27
RESTRICT="!test? ( test )"
26 28

  
27 29
RDEPEND="
28 30
	dev-util/hip:${SLOT}
29
	>=dev-db/sqlite-3.17
30
	sci-libs/rocBLAS:${SLOT}[${ROCM_USEDEP}]
31
	>=dev-libs/boost-1.72
32
	dev-cpp/nlohmann_json
33
	dev-cpp/frugally-deep
31
	${PYTHON_DEPS}
34 32
"
35 33

  
36 34
DEPEND="${RDEPEND}"
......
41 39

  
42 40
PATCHES=(
43 41
	"${FILESDIR}"/${PN}-6.1.1-enable-examples.patch
44
	"${FILESDIR}"/${PN}-6.1.1-fix-clang-17-no-offload-uniform-block.patch
45 42
	"${FILESDIR}"/${PN}-6.1.1-no-git-no-hash.patch
46
	"${FILESDIR}"/${PN}-6.1.1-fix-libcxx.patch
43
	"${FILESDIR}"/${PN}-6.3.0-no-inline-all.patch
44
	"${FILESDIR}"/${PN}-6.3.0-conditional-kernels.patch
45
	"${FILESDIR}"/${PN}-6.3.0-conditional-ckprofiler.patch
46
	"${FILESDIR}"/${PN}-6.3.0-expand-isa.patch
47 47
)
48 48

  
49
ck_check-reqs() {
50
	[[ ${MERGE_TYPE} == binary ]] && return
51

  
52
	targets=($AMDGPU_TARGETS)
53
	if [[ ${#targets[@]} -gt 1 ]]; then
54
		ewarn "composable-kernel will be compiled for multiple GPU architectures,"
55
		ewarn "which will take a significant amount of time."
56
		ewarn "Please consider setting AMDGPU_TARGETS USE_EXPAND variable to a single architecture."
57
	fi
58

  
59
	# It takes ~2Gb of RAM per build thread
60
	local user_jobs=$(makeopts_jobs)
61
	local free_memory_mb=$(free -m | awk '/Mem:/ {print $4}')
62
	local max_jobs=$(( free_memory_mb / 2048 ))
63
	max_jobs=$(( max_jobs < 1 ? 1 : max_jobs ))
64
	local limited_jobs=$(( user_jobs < max_jobs ? user_jobs : max_jobs ))
65
	if [[ "${max_jobs}" -lt "${user_jobs}" ]]; then
66
		ewarn "${free_memory_mb} MB of free RAM is not enough for ${user_jobs} parallel build jobs (~2Gb per job)."
67
		ewarn "Please consider setting MAKEOPTS=\"-j${limited_jobs}\" for this package."
68
	fi
69

  
70
	local CHECKREQS_MEMORY=$((user_jobs*2048))M
71
	check-reqs_${EBUILD_PHASE_FUNC}
72
}
73

  
74
pkg_pretend() {
75
	ck_check-reqs
76
}
77

  
78
pkg_setup() {
79
	ck_check-reqs
80
}
81

  
49 82
src_prepare() {
50 83
	sed -e '/-Werror/d' -i cmake/EnableCompilerWarnings.cmake || die
51 84
	cmake_src_prepare
......
68 101
		-DGPU_TARGETS="$(get_amdgpu_flags)"
69 102
		-DCMAKE_INSTALL_PREFIX="${EPREFIX}/usr"
70 103
		-DBUILD_TESTING=$(usex test ON OFF)
104
		-DCK_USE_PROFILER=$(usex profiler ON OFF)
105
		-Wno-dev
71 106
	)
72 107

  
73 108
	if use test; then
74 109
		mycmakeargs+=(
75
			-DGOOGLETEST_DIR="${WORKDIR}/googletest-${GTEST_COMMIT}"
110
			-DFETCHCONTENT_SOURCE_DIR_GTEST="${WORKDIR}/googletest-${GTEST_COMMIT}"
76 111
		)
77 112
	fi
78 113

  
79 114
	cmake_src_configure
80 115
}
81 116

  
117
src_install() {
118
	cmake_src_install
119

  
120
	installation() {
121
		python_domodule python/ck4inductor
122

  
123
		# install package-data manually, as there is no PEP517 compliance
124
		shopt -s globstar
125
		package_data=(
126
			include/ck/**/*.hpp
127
			library/src/tensor_operation_instance/gpu/gemm_universal/**/*.hpp
128
		)
129
		shopt -u globstar
130

  
131
		inst_path="${D}$(python_get_sitedir)/ck4inductor"
132
		for file in "${package_data[@]}"; do
133
			location="${inst_path}/$(dirname "$file")"
134
			mkdir -p "${location}"
135
			cp "${file}" "${location}"
136
		done
137
	}
138
	python_foreach_impl installation
139
}
140

  
82 141
src_test() {
83 142
	check_amdgpu
84 143
	LD_LIBRARY_PATH="${BUILD_DIR}"/lib cmake_src_test -j1
Спасибо!