diff options
-rw-r--r-- | CMakeLists.txt | 125 | ||||
-rw-r--r-- | opus_functions.cmake | 48 |
2 files changed, 119 insertions, 54 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index fb2a1123..a8d72366 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,6 +55,7 @@ else() endif() endif() + if(OPUS_CPU_X86 OR OPUS_CPU_X64) cmake_dependent_option(OPUS_X86_MAY_HAVE_SSE "Does runtime check for SSE1 support" @@ -77,6 +78,7 @@ if(OPUS_CPU_X86 OR OPUS_CPU_X64) "AVX_SUPPORTED" OFF) + # PRESUME depends on MAY HAVE, but PRESUME will override runtime detection if(OPUS_CPU_X64) # Assume 64 bit has SSE2 support cmake_dependent_option(OPUS_X86_PRESUME_SSE "Assume target CPU has SSE1 support" @@ -151,14 +153,14 @@ if(OPUS_CPU_X86 OR OPUS_CPU_X64) "does runtime check for SSE4_1 support") add_feature_info(X86_MAY_HAVE_AVX OPUS_X86_MAY_HAVE_AVX "does runtime check for AVX support") - add_feature_info(X86_PRESUME_SSE OPUS_X86_PRESUME_SSE - "assume target CPU has SSE1 support") - add_feature_info(X86_PRESUME_SSE2 OPUS_X86_PRESUME_SSE2 - "assume target CPU has SSE2 support") - add_feature_info(X86_PRESUME_SSE4_1 OPUS_X86_PRESUME_SSE4_1 - "assume target CPU has SSE4_1 support") - add_feature_info(X86_PRESUME_AVX OPUS_X86_PRESUME_AVX - "assume target CPU has AVX support") + add_feature_info(OPUS_X86_PRESUME_SSE OPUS_X86_PRESUME_SSE + "assume target CPU has SSE1 support will override the runtime check") + add_feature_info(OPUS_X86_PRESUME_SSE2 OPUS_X86_PRESUME_SSE2 + "assume target CPU has SSE2 support will override the runtime check") + add_feature_info(OPUS_X86_PRESUME_SSE4_1 OPUS_X86_PRESUME_SSE4_1 + "assume target CPU has SSE4_1 support will override the runtime check") + add_feature_info(OPUS_X86_PRESUME_AVX OPUS_X86_PRESUME_AVX + "assume target CPU has AVX support will override the runtime check") endif() feature_summary(WHAT ALL) @@ -242,39 +244,100 @@ if(NOT OPUS_ENABLE_FLOAT_API) target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API) endif() -if(OPUS_X86_MAY_HAVE_SSE - OR OPUS_X86_MAY_HAVE_SSE2 - OR OPUS_X86_MAY_HAVE_SSE4_1 - OR OPUS_X86_MAY_HAVE_AVX) +#[[Build flags for SSE will be set the following way: +MSVC: If OPUS_X86_PRESUME_X is set then we will set the highest possible /arch:X +we won't set any ARCH flag for OPUS_X86_MAY_HAVE_SSE due to: +https://randomascii.wordpress.com/2016/12/05/vc-archavx-option-unsafe-at-any-speed/ +For non MSVC: we will set the compiler flags on per file basis for OPUS_X86_MAY_HAVE_SSE +for OPUS_X86_PRESUME_X we will set it for the target]] + +if((OPUS_X86_MAY_HAVE_SSE AND NOT OPUS_X86_PRESUME_SSE) OR + (OPUS_X86_MAY_HAVE_SSE2 AND NOT OPUS_X86_PRESUME_SSE2) OR + (OPUS_X86_MAY_HAVE_SSE4_1 AND NOT OPUS_X86_PRESUME_SSE4_1) OR + (OPUS_X86_MAY_HAVE_AVX AND NOT OPUS_X86_PRESUME_AVX)) target_compile_definitions(opus PRIVATE OPUS_HAVE_RTCD) endif() -if(OPUS_X86_MAY_HAVE_SSE) - add_sources_group(opus celt ${celt_sources_sse}) - target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE) -endif() -if(OPUS_X86_PRESUME_SSE) - target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE) +if(SSE1_SUPPORTED) + if(OPUS_X86_MAY_HAVE_SSE) + add_sources_group(opus celt ${celt_sources_sse}) + target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE) + if(NOT MSVC) + set_source_files_properties(${celt_sources_sse} PROPERTIES COMPILE_FLAGS -msse) + endif() + endif() + if(OPUS_X86_PRESUME_SSE) + target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE) + if(NOT MSVC) + target_compile_options(opus PRIVATE -msse) + endif() + endif() endif() -if(OPUS_X86_MAY_HAVE_SSE2) - add_sources_group(opus celt ${celt_sources_sse2}) - target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2) +if(SSE2_SUPPORTED) + if(OPUS_X86_MAY_HAVE_SSE2) + add_sources_group(opus celt ${celt_sources_sse2}) + target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2) + if(NOT MSVC) + set_source_files_properties(${celt_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2) + endif() + endif() + if(OPUS_X86_PRESUME_SSE2) + target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE2) + if(NOT MSVC) + target_compile_options(opus PRIVATE -msse2) + endif() + endif() endif() -if(OPUS_X86_PRESUME_SSE2) - target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE2) + +if(SSE4_1_SUPPORTED) + if(OPUS_X86_MAY_HAVE_SSE4_1) + add_sources_group(opus celt ${celt_sources_sse4_1}) + add_sources_group(opus silk ${silk_sources_sse4_1}) + target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1) + if(NOT MSVC) + set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1) + endif() + + if(OPUS_FIXED_POINT) + add_sources_group(opus silk ${silk_sources_fixed_sse4_1}) + if(NOT MSVC) + set_source_files_properties(${silk_sources_fixed_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1) + endif() + endif() + endif() + if(OPUS_X86_PRESUME_SSE4_1) + target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE4_1) + if(NOT MSVC) + target_compile_options(opus PRIVATE -msse4.1) + endif() + endif() endif() -if(OPUS_X86_MAY_HAVE_SSE) - add_sources_group(opus celt ${celt_sources_sse4_1}) - add_sources_group(opus silk ${silk_sources_sse4_1}) - if(OPUS_FIXED_POINT) - add_sources_group(opus silk ${silk_sources_fixed_sse4_1}) +if(AVX_SUPPORTED) + # mostly placeholder in case of avx intrinsics is added + if(OPUS_X86_MAY_HAVE_AVX) + target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX) + endif() + if(OPUS_X86_PRESUME_AVX) + target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_AVX) + if(NOT MSVC) + target_compile_options(opus PRIVATE -mavx) + endif() endif() - target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1) endif() -if(OPUS_X86_PRESUME_SSE4_1) - target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE4_1) + +if(MSVC) + if(AVX_SUPPORTED AND OPUS_X86_PRESUME_AVX) # on 64 bit and 32 bits + add_definitions(/arch:AVX) + elseif(OPUS_CPU_X86) # if AVX not supported then set SSE flag + if((SSE4_1_SUPPORTED AND OPUS_X86_PRESUME_SSE4_1) + OR (SSE2_SUPPORTED AND OPUS_X86_PRESUME_SSE2)) + target_compile_definitions(opus PRIVATE /arch:SSE2) + elseif(SSE1_SUPPORTED AND OPUS_X86_PRESUME_SSE) + target_compile_definitions(opus PRIVATE /arch:SSE) + endif() + endif() endif() if(CMAKE_SYSTEM_PROCESSOR MATCHES "(armv7-a)") diff --git a/opus_functions.cmake b/opus_functions.cmake index 727601c4..04249ace 100644 --- a/opus_functions.cmake +++ b/opus_functions.cmake @@ -120,13 +120,17 @@ function(opus_detect_sse COMPILER_SUPPORT_SIMD) if(CMAKE_SIZEOF_VOID_P EQUAL 4) check_flag(SSE1 /arch:SSE) else() - set(SSE1_SUPPORTED 1 PARENT_SCOPE) + set(SSE1_SUPPORTED + 1 + PARENT_SCOPE) endif() else() - check_and_set_flag(SSE1 -msse) + check_flag(SSE1 -msse) endif() else() - set(SSE1_SUPPORTED 0 PARENT_SCOPE) + set(SSE1_SUPPORTED + 0 + PARENT_SCOPE) endif() check_include_file(emmintrin.h HAVE_EMMINTRIN_H) # SSE2 @@ -135,13 +139,17 @@ function(opus_detect_sse COMPILER_SUPPORT_SIMD) if(CMAKE_SIZEOF_VOID_P EQUAL 4) check_flag(SSE2 /arch:SSE2) else() - set(SSE2_SUPPORTED 1 PARENT_SCOPE) + set(SSE2_SUPPORTED + 1 + PARENT_SCOPE) endif() else() - check_and_set_flag(SSE2 -msse2) + check_flag(SSE2 -msse2) endif() else() - set(SSE2_SUPPORTED 0 PARENT_SCOPE) + set(SSE2_SUPPORTED + 0 + PARENT_SCOPE) endif() check_include_file(smmintrin.h HAVE_SMMINTRIN_H) # SSE4.1 @@ -150,13 +158,17 @@ function(opus_detect_sse COMPILER_SUPPORT_SIMD) if(CMAKE_SIZEOF_VOID_P EQUAL 4) check_flag(SSE4_1 /arch:SSE2) # SSE2 and above else() - set(SSE4_1_SUPPORTED 1 PARENT_SCOPE) + set(SSE4_1_SUPPORTED + 1 + PARENT_SCOPE) endif() else() - check_and_set_flag(SSE4_1 -msse4.1) + check_flag(SSE4_1 -msse4.1) endif() else() - set(SSE4_1_SUPPORTED 0 PARENT_SCOPE) + set(SSE4_1_SUPPORTED + 0 + PARENT_SCOPE) endif() check_include_file(immintrin.h HAVE_IMMINTRIN_H) # AVX @@ -164,22 +176,12 @@ function(opus_detect_sse COMPILER_SUPPORT_SIMD) if(MSVC) check_flag(AVX /arch:AVX) else() - check_and_set_flag(AVX -mavx) + check_flag(AVX -mavx) endif() else() - set(AVX_SUPPORTED 0 PARENT_SCOPE) - endif() - - if(MSVC) # To avoid warning D9025 of overriding compiler options - if(AVX_SUPPORTED) # on 64 bit and 32 bits - add_definitions(/arch:AVX) - elseif(CMAKE_SIZEOF_VOID_P EQUAL 4) # if AVX not supported then set SSE flag - if(SSE4_1_SUPPORTED OR SSE2_SUPPORTED) - add_definitions(/arch:SSE2) - elseif(SSE1_SUPPORTED) - add_definitions(/arch:SSE) - endif() - endif() + set(AVX_SUPPORTED + 0 + PARENT_SCOPE) endif() if(SSE1_SUPPORTED OR SSE2_SUPPORTED OR SSE4_1_SUPPORTED OR AVX_SUPPORTED) |