# 
#     Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
# 

# Compiler Configurations for native x86-64 targets
#
# Compiler configurations specific to x86 architecture
#

variable TARGETARCH is default(64);

set TARGETCOMMON=-x 70 0x8000 -x 122 1 -x 125 0x20000;

# -Mlarge_arrays by default
append F901ARGS=-x 68 0x1;
append F902ARGS=-x 68 0x1;

append F901ARGS=$if($SSE3,-x 70 0x40000000);

append CGARGS=$if($SSE3,-x 70 0x40000000);

append F902ARGS=-x 8 0x40000000;
append CGARGS=-x 8 0x40000000;

switch -fast is
	help(Common optimizations; includes -O2 -Munroll=c:1 -Mnoframe -Mlre -Mautoinline)
	helpgroup(opt)
	append(CGARGS=
		-x 9 1			# -Munroll=c:1
		-x 42 0x14200000	# -Mlre=prefer - now that vectorization/lre works
		-x 136 0x11		# Branch prediction & BB placement	
#		-x 53 10		# Pointer target analysis
	)
 	append(F901ARGS=
 		-x 53 2			# Pointer target analysis
 	)
    append(F902ARGS=-x 46 4)
	append(CARGS=-autoinl $AUTOLEVELS -x 168 $AUTOMAXFUNCSIZE -x 174 $AUTOMAXTOTALSIZE -x 14 0x200000 -x 46 4 $ifn($NOSTATIC, -x 14 0x400000))
	set(AUTOINLINE=YES)
        # Enable -Minline
        #append(INLINELIST=-insize $MAXFUNCSIZE -x 221 $MAXTOTALSIZE -x 222 $SMALLSIZE)
	#set(INLINE=YES)
	max(OPTLEVELDEF=2)	# -O2
	max(OPTLEVELMIN=2)	# -O2
        or(MVECTFASTFUSE=1)
	set(NOFRAME=YES)	# -Mnoframe
	shorthand(-Mvect=sse -Mcache_align -Mflushz -Mpre)
	append(OPTLEVELMINLIMIT=" -fast");

switch -fastsse is
	shorthand(-fast)
	helpgroup(opt);

##switch -Opt is hide
##        max(OPTLEVELMIN=2)
##   the remaing stuff is copied and pasted to -O2/-O3/-O3 instead of
##   shorthand(-Opt)
##	append(CGARGS=
##		-x 9 1			# -Munroll=c:1
##		-x 72 0x1		# Scheduling enabled
##		-x 136 0x11		# Branch prediction & BB placement	
###		-x 53 10		# Pointer target analysis
##	)
## 	append(F901ARGS=
## 		-x 53 2			# Pointer target analysis
## 	)
##	append(CARGS=-autoinl $AUTOLEVELS -x 168 $AUTOMAXFUNCSIZE -x 174 $AUTOMAXTOTALSIZE -x 14 0x200000)
##	set(AUTOINLINE=YES)
##	set(NOFRAME=YES)	# -Mnoframe
##	shorthand(-Mvect=sse -Mcache_align -Mpre)
##	;

replace switch -O0 is
	help(Set opt level.  Creates a basic block for each statement.  No scheduling or global optimizations performed)
	set(OPTLEVEL=0)
	helpgroup(opt)
	;

replace switch -O1 is
	help(Set opt level. Some scheduling and register allocation is enabled. No global optimizations performed)
	helpgroup(opt)
	set(OPTLEVEL=1);
	;

replace switch -O is
	help(Set opt level.  All -O1 optimizations plus traditional scheduling and global scalar optimizations performed)
	helpgroup(opt)
	set(OPTLEVEL=2)
	;

replace switch -O2 is
	help(Set opt level.  All -O optimizations plus SIMD code generation, cache alignment, and partial redundancy elimination performed)
	helpgroup(opt)
	set(OPTLEVEL=2)
##	shorthand(-Opt)
	append(CGARGS=
		-x 9 1			# -Munroll=c:1
		-x 136 0x11		# Branch prediction & BB placement	
#		-x 53 10		# Pointer target analysis
	)
 	append(F901ARGS=
 		-x 53 2			# Pointer target analysis
 	)
	append(CARGS=-autoinl $AUTOLEVELS -x 168 $AUTOMAXFUNCSIZE -x 174 $AUTOMAXTOTALSIZE -x 14 0x200000 $ifn($NOSTATIC, -x 14 0x400000))
	set(AUTOINLINE=YES)
	set(NOFRAME=YES)	# -Mnoframe
	shorthand(-Mvect=sse -Mcache_align -Mpre)
	;

replace switch -O3 is
	help(Set opt level. All -O2 optimizations plus more aggressive code hoisting and scalar replacement, that may or may not be profitable, performed)
	helpgroup(opt)
	set(OPTLEVEL=3)
##	shorthand(-Opt)
	append(CGARGS=
		-x 9 1			# -Munroll=c:1
		-x 136 0x11		# Branch prediction & BB placement	
#		-x 53 10		# Pointer target analysis
	)
 	append(F901ARGS=
 		-x 53 2			# Pointer target analysis
 	)
	append(CARGS=-autoinl $AUTOLEVELS -x 168 $AUTOMAXFUNCSIZE -x 174 $AUTOMAXTOTALSIZE -x 14 0x200000 $ifn($NOSTATIC, -x 14 0x400000))
	set(AUTOINLINE=YES)
	set(NOFRAME=YES)	# -Mnoframe
	shorthand(-Mvect=sse -Mcache_align -Mpre)
	;

replace switch -O4 is
	help(Set opt level. All -O3 optimizations plus more aggressive hoisting of guarded expressions performed)
	helpgroup(opt)
	set(OPTLEVEL=4)
##	shorthand(-Opt)
	append(CGARGS=
		-x 9 1			# -Munroll=c:1
		-x 136 0x11		# Branch prediction & BB placement	
#		-x 53 10		# Pointer target analysis
	)
 	append(F901ARGS=
 		-x 53 2			# Pointer target analysis
 	)
	append(CARGS=-autoinl $AUTOLEVELS -x 168 $AUTOMAXFUNCSIZE -x 174 $AUTOMAXTOTALSIZE -x 14 0x200000 $ifn($NOSTATIC, -x 14 0x400000))
	set(AUTOINLINE=YES)
	set(NOFRAME=YES)	# -Mnoframe
	shorthand(-Mvect=sse -Mcache_align -Mpre)
	;

switch -Mlarge_arrays is
	help(Support for large arrays)
	helpname(-M[no]large_arrays)
	helpgroup(target)
	append(F901ARGS=-x 68 0x1)
	append(CGARGS=-x 68 0x1);

switch -Mnolarge_arrays is hide
	help(-Mnolarge_arrays is not supported on this target)
	helpgroup(language);

switch -Mcache_align is
	help(Align large objects on cache-line boundaries)
	helpgroup(opt)
	append(F901ARGS=-quad -x 119 0x10000000)
	append(CGARGS=-quad -x 119 0x10000000 -x 129 0x40000000);

switch -Mnocache_align is hide
	help(No special cache-line alignement of large objects)
	helpgroup(opt)
#  -quad is default for 64-bit, so do not pass -noquad
	append(F901ARGS=-y 119 0x10000000)
	append(CGARGS=-y 119 0x10000000 -y 129 0x40000000);

switch -Mvarargs is
	append(CGARGS=-x 125 0x200)
	helpgroup(target)
	help(Calls from Fortran to C will use varargs calling sequence)
	helpname(-M[no]varargs);

switch -Mnovarargs is hide
	append(CGARGS=-y 125 0x200)
	helpgroup(target)
	help(Calls from Fortran to C will use normal calling sequence);
