Didn’t finetune the recipe my bad, was in a hurry to try it out I guess, will do so in a bit.
Other then that, missing one werror somewhere
/Share/wip/onnxruntime-1.17.3/build/_deps/neural_speed-src/bestla/bestla_parallel.h: In instantiation of 'class bestla::parallel::gemm::SchedulerBase<bestla::gemm::ICoreRowNAvxvnniKBlock<24, 2> >':
/Share/wip/onnxruntime-1.17.3/build/_deps/neural_speed-src/bestla/bestla_parallel.h:476:7: required from 'class bestla::parallel::gemm::SchedulerKBlockS<bestla::gemm::ICoreRowNAvxvnniKBlock<24, 2> >'
/Share/wip/onnxruntime-1.17.3/build/_deps/neural_speed-src/bestla/bestla_parallel.h:657:14: required from 'void bestla::parallel::GemmRun(Launch_T&, const typename Launch_T::Param&, IThreading*) [with Parallel_T = gemm::SchedulerKBlockS<bestla::gemm::ICoreRowNAvxvnniKBlock<24, 2> >; Launch_T = bestla::wrapper::gemm::LauncherIntKBlock<BTLA_ISA::AVX_VNNI, bestla::gemm::ICoreRowNAvxvnniKBlock<24, 2>, bestla::prologue_a::gemm::ActivationF32KBlockQuantize, bestla::prologue_b::gemm::WeightKBlockNInteger, bestla::epilogue::gemm::AccumulatorWriteBackFp32>; typename Launch_T::Param = bestla::wrapper::gemm::LauncherIntKBlock<BTLA_ISA::AVX_VNNI, bestla::gemm::ICoreRowNAvxvnniKBlock<24, 2>, bestla::prologue_a::gemm::ActivationF32KBlockQuantize, bestla::prologue_b::gemm::WeightKBlockNInteger, bestla::epilogue::gemm::AccumulatorWriteBackFp32>::Param]'
/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.cc:98:30: required from 'void bestla::NSSQ4GemmCompInt8(size_t, size_t, size_t, const float*, size_t, storage::gemm::StorageWeightKBlockNInteger*, float*, size_t, int8_t*, parallel::IThreading*) [with GemmCore_T = gemm::ICoreRowNAvxvnniKBlock<24, 2>; size_t = long unsigned int; int8_t = signed char]'
/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.cc:183:64: required from here
/Share/wip/onnxruntime-1.17.3/build/_deps/neural_speed-src/bestla/bestla_parallel.h:66:16: error: 'virtual void bestla::parallel::Scheduler2D::update(const bestla::parallel::Config2D&)' was hidden [-Werror=overloaded-virtual=]
66 | virtual void update(const Config2D& config) {
| ^~~~~~
/Share/wip/onnxruntime-1.17.3/build/_deps/neural_speed-src/bestla/bestla_parallel.h:151:16: note: by 'void bestla::parallel::gemm::SchedulerBase<_GemmCore_T>::update(const bestla::parallel::gemm::Config&) [with _GemmCore_T = bestla::gemm::ICoreRowNAvxvnniKBlock<24, 2>]'
151 | virtual void update(const Config& config) {
| ^~~~~~
/Share/wip/onnxruntime-1.17.3/build/_deps/neural_speed-src/bestla/bestla_parallel.h:49:16: error: 'virtual void bestla::parallel::Scheduler2D::getIndex(ThreadProblem&) const' was hidden [-Werror=overloaded-virtual=]
49 | virtual void getIndex(ThreadProblem& problem) const {
| ^~~~~~~~
/Share/wip/onnxruntime-1.17.3/build/_deps/neural_speed-src/bestla/bestla_parallel.h:142:16: note: by 'void bestla::parallel::gemm::SchedulerBase<_GemmCore_T>::getIndex(ThreadProblem&) [with _GemmCore_T = bestla::gemm::ICoreRowNAvxvnniKBlock<24, 2>; ThreadProblem = bestla::parallel::gemm::ThreadProblemBase]'
142 | virtual void getIndex(ThreadProblem& problem) {
| ^~~~~~~~
[ 95%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/quantization/qlinear_activations.cc.o
[ 95%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/quantization/qlinear_binary_op.cc.o
[ 95%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/quantization/qlinear_concat.cc.o
[ 95%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/quantization/qlinear_global_average_pool.cc.o
[ 95%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/quantization/qlinear_lookup_table.cc.o
[ 95%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/quantization/qlinear_pool.cc.o
[ 96%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/quantization/qlinear_softmax.cc.o
[ 96%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/quantization/qlinear_where.cc.o
[ 96%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/quantization/quant_gemm.cc.o
[ 96%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/sample.cc.o
[ 96%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc.o
[ 96%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/tensor/shrunken_gather.cc.o
[ 96%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/tokenizer.cc.o
[ 96%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/transformers/beam_search.cc.o
[ 97%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/transformers/beam_search_parameters.cc.o
[ 97%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/transformers/beam_search_scorer.cc.o
[ 97%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/transformers/dump_tensor.cc.o
[ 97%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/transformers/generation_device_helper.cc.o
[ 97%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/transformers/greedy_search.cc.o
[ 97%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/transformers/greedy_search_parameters.cc.o
[ 97%] Building CXX object CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/transformers/logits_processor.cc.o
cc1plus: all warnings being treated as errors
CMakeFiles/onnxruntime_providers.dir/build.make:2234: recipe for target 'CMakeFiles/onnxruntime_providers.dir/Share/wip/onnxruntime-1.17.3/onnxruntime/contrib_ops/cpu/quantization/neural_speed_gemm.cc.o' failed