libgo patch committed: Update to weekly.2011-11-02

2011-12-02 Thread Ian Lance Taylor
I have updated the libgo library to the weekly.2011-11-02 release of the
master library.  The only changes in this patch are to switch from using
the type os.Error to the new predeclared type error, and to add a new
errors package providing a couple of convenience functions.  This causes
mechanical changes to over 450 files in the library.  I have not
included the complete diffs in this e-mail message; they are available
from SVN or from the master library.  I have only included the diffs to
files outside the Go library proper.  Bootstrapped and ran Go testsuite
on x86_64-unknown-linux-gnu.  Committed to mainline.

Ian

diff -r c05039b68dc2 libgo/MERGE
--- a/libgo/MERGE	Fri Dec 02 16:13:57 2011 -0800
+++ b/libgo/MERGE	Fri Dec 02 17:54:15 2011 -0800
@@ -1,4 +1,4 @@
-941b8015061a
+780c85032b17
 
 The first line of this file holds the Mercurial revision number of the
 last merge done from the master library sources.
diff -r c05039b68dc2 libgo/Makefile.am
--- a/libgo/Makefile.am	Fri Dec 02 16:13:57 2011 -0800
+++ b/libgo/Makefile.am	Fri Dec 02 17:54:15 2011 -0800
@@ -107,6 +107,7 @@
 	cmath.gox \
 	crypto.gox \
 	csv.gox \
+	errors.gox \
 	exec.gox \
 	expvar.gox \
 	flag.gox \
@@ -563,6 +564,9 @@
 	go/csv/reader.go \
 	go/csv/writer.go
 
+go_errors_files = \
+	go/errors/errors.go
+
 go_exec_files = \
 	go/exec/exec.go \
 	go/exec/lp_unix.go
@@ -1623,6 +1627,7 @@
 	cmath/cmath.lo \
 	crypto/crypto.lo \
 	csv/csv.lo \
+	errors/errors.lo \
 	exec/exec.lo \
 	expvar/expvar.lo \
 	flag/flag.lo \
@@ -1944,6 +1949,15 @@
 	@$(CHECK)
 .PHONY: csv/check
 
+@go_include@ errors/errors.lo.dep
+errors/errors.lo.dep: $(go_errors_files)
+	$(BUILDDEPS)
+errors/errors.lo: $(go_errors_files)
+	$(BUILDPACKAGE)
+errors/check: $(CHECK_DEPS)
+	@$(CHECK)
+.PHONY: errors/check
+
 @go_include@ exec/exec.lo.dep
 exec/exec.lo.dep: $(go_exec_files)
 	$(BUILDDEPS)
@@ -3445,6 +3459,8 @@
 	$(BUILDGOX)
 csv.gox: csv/csv.lo
 	$(BUILDGOX)
+errors.gox: errors/errors.lo
+	$(BUILDGOX)
 exec.gox: exec/exec.lo
 	$(BUILDGOX)
 expvar.gox: expvar/expvar.lo
@@ -3791,6 +3807,7 @@
 	bytes/check \
 	cmath/check \
 	csv/check \
+	errors/check \
 	exec/check \
 	expvar/check \
 	flag/check \
diff -r c05039b68dc2 libgo/merge.sh
--- a/libgo/merge.sh	Fri Dec 02 16:13:57 2011 -0800
+++ b/libgo/merge.sh	Fri Dec 02 17:54:15 2011 -0800
@@ -121,6 +121,21 @@
   fi
 }
 
+merge_c() {
+  from=$1
+  to=$2
+  oldfile=${OLDDIR}/src/pkg/runtime/$from
+  if test -f ${oldfile}; then
+sed -e 's/·/_/g' < ${oldfile} > ${oldfile}.tmp
+oldfile=${oldfile}.tmp
+newfile=${NEWDIR}/src/pkg/runtime/$from
+sed -e 's/·/_/g' < ${newfile} > ${newfile}.tmp
+newfile=${newfile}.tmp
+libgofile=runtime/$to
+merge $from ${oldfile} ${newfile} ${libgofile}
+  fi
+}
+
 (cd ${NEWDIR}/src/pkg && find . -name '*.go' -print) | while read f; do
   if test `dirname $f` = "./syscall"; then
 continue
@@ -153,32 +168,11 @@
 
 runtime="chan.c cpuprof.c goc2c.c lock_futex.c lock_sema.c mcache.c mcentral.c mfinal.c mfixalloc.c mgc0.c mheap.c msize.c proc.c runtime.c runtime.h malloc.h malloc.goc mprof.goc runtime1.goc sema.goc sigqueue.goc string.goc"
 for f in $runtime; do
-  oldfile=${OLDDIR}/src/pkg/runtime/$f
-  if test -f ${oldfile}; then
-sed -e 's/·/_/g' < ${oldfile} > ${oldfile}.tmp
-oldfile=${oldfile}.tmp
-newfile=${NEWDIR}/src/pkg/runtime/$f
-sed -e 's/·/_/g' < ${newfile} > ${newfile}.tmp
-newfile=${newfile}.tmp
-libgofile=runtime/$f
-merge $f ${oldfile} ${newfile} ${libgofile}
-  fi
+  merge_c $f $f
 done
 
-runtime2="linux/thread.c thread-linux.c linux/mem.c mem.c"
-echo $runtime2 | while read from; do
-  read to
-  oldfile=${OLDDIR}/src/pkg/runtime/$from
-  if test -f ${oldfile}; then
-sed -e 's/·/_/g' < ${oldfile} > ${oldfile}.tmp
-oldfile=${oldfile}.tmp
-newfile=${NEWDIR}/src/pkg/runtime/$from
-sed -e 's/·/_/g' < ${newfile} > ${newfile}.tmp
-newfile=${newfile}.tmp
-libgofile=runtime/$to
-merge $f ${oldfile} ${newfile} ${libgofile}
-  fi
-done
+merge_c linux/thread.c thread-linux.c
+merge_c linux/mem.c mem.c
 
 (cd ${OLDDIR}/src/pkg && find . -name '*.go' -print) | while read f; do
   oldfile=${OLDDIR}/src/pkg/$f
diff -r c05039b68dc2 libgo/testsuite/gotest
--- a/libgo/testsuite/gotest	Fri Dec 02 16:13:57 2011 -0800
+++ b/libgo/testsuite/gotest	Fri Dec 02 17:54:15 2011 -0800
@@ -346,7 +346,6 @@
 		echo 'import "./_xtest_"'
 	fi
 	echo 'import "testing"'
-	echo 'import __os__ "os"' # rename in case tested package is called os
 	echo 'import __regexp__ "regexp"' # rename in case tested package is called regexp
 	# test array
 	echo
@@ -385,7 +384,7 @@
 var matchPat string
 var matchRe *__regexp__.Regexp
 
-func matchString(pat, str string) (result bool, err __os__.Error) {
+func matchString(pat, str string) (result bool, err error) {
 	if matchRe == nil || matchPat != pat {
 		matchPat = pat
 		matchRe, err = __regexp__.Compile(matchPat)


Re: [libstdc++] doc/xml/manual/abi.xml -- fix references to GCC as well as GNU/Linux

2011-12-02 Thread Jonathan Wakely
On 27 November 2011 00:24, Gerald Pfeifer wrote:
>
> On the way I spotted an odd reference to GCC.  Looking at the
> overall document, it occurs to me that
>
>  - newer versions of GCC are not covered, and
>  - references to GCC generally are of the form gcc-X.Y instead of GCC X.Y.
>
> Is this something one of you guys (libstdc++) could have a look at?

How's this?  I think I got all the versions and dates correct, but I
must say I find keeping some of this info in the manual to be tedious
and unnecessary.

For an unnecessary example, these days the value of __GLIBCXX__ is the
date a release was made, available from e.g.
http://gcc.gnu.org/gcc-4.6/

To deal with the tedious parts, I changed a few repetitive instances
of 4.1.0, 4.1.1, 4.2.0, 4.2.1, 4.3.0 etc. etc. to just 4.x.x which
will be accurate in future and can be changed if it needs to be,
rather than having to keep adding new entries that say the headers for
GCC 4.6.1 are in include/c++/4.6.1 and, guess what, the headers for
GCC 4.6.2 are in include/c++/4.6.2

Would 4.*.* or 4.?.? be better than 4.x.x?

I'm not sure why we need to explicitly state the libgcc soname for
every release when it's always the same.

If noone objects to this approach I'll regenerate the HTML pages and
check this in at some point in the next few days.

If anyone objects, please find a volunteer to keep the tedious version
up to date ;-)
Index: doc/xml/manual/abi.xml
===
--- doc/xml/manual/abi.xml  (revision 181390)
+++ doc/xml/manual/abi.xml  (working copy)
@@ -164,28 +164,28 @@ compatible.
 
 
 
-gcc-3.0.0: libgcc_s.so.1
-gcc-3.0.1: libgcc_s.so.1
-gcc-3.0.2: libgcc_s.so.1
-gcc-3.0.3: libgcc_s.so.1
-gcc-3.0.4: libgcc_s.so.1
-gcc-3.1.0: libgcc_s.so.1
-gcc-3.1.1: libgcc_s.so.1
-gcc-3.2.0: libgcc_s.so.1
-gcc-3.2.1: libgcc_s.so.1
-gcc-3.2.2: libgcc_s.so.1
-gcc-3.2.3: libgcc_s.so.1
-gcc-3.3.0: libgcc_s.so.1
-gcc-3.3.1: libgcc_s.so.1
-gcc-3.3.2: libgcc_s.so.1
-gcc-3.3.3: libgcc_s.so.1
-gcc-3.4.x, gcc-4.[0-5].x: libgcc_s.so.1
+GCC 3.0.0: libgcc_s.so.1
+GCC 3.0.1: libgcc_s.so.1
+GCC 3.0.2: libgcc_s.so.1
+GCC 3.0.3: libgcc_s.so.1
+GCC 3.0.4: libgcc_s.so.1
+GCC 3.1.0: libgcc_s.so.1
+GCC 3.1.1: libgcc_s.so.1
+GCC 3.2.0: libgcc_s.so.1
+GCC 3.2.1: libgcc_s.so.1
+GCC 3.2.2: libgcc_s.so.1
+GCC 3.2.3: libgcc_s.so.1
+GCC 3.3.0: libgcc_s.so.1
+GCC 3.3.1: libgcc_s.so.1
+GCC 3.3.2: libgcc_s.so.1
+GCC 3.3.3: libgcc_s.so.1
+GCC 3.4.x, GCC 4.x.x: libgcc_s.so.1
 
 
 For m68k-linux the versions differ as follows: 
 
 
-gcc-3.4.x, gcc-4.[0-5].x: libgcc_s.so.1
+GCC 3.4.x, GCC 4.x.x: libgcc_s.so.1
 when configuring --with-sjlj-exceptions, or
 libgcc_s.so.2  
 
@@ -193,10 +193,10 @@ compatible.
 For hppa-linux the versions differ as follows: 
 
 
-gcc-3.4.x, gcc-4.[0-1].x: either libgcc_s.so.1
+GCC 3.4.x, GCC 4.[0-1].x: either libgcc_s.so.1
 when configuring --with-sjlj-exceptions, or
 libgcc_s.so.2  
-gcc-4.[2-5].x: either libgcc_s.so.3 when configuring
+GCC 4.[2-7].x: either libgcc_s.so.3 when configuring
 --with-sjlj-exceptions) or libgcc_s.so.4
  
 
@@ -213,19 +213,22 @@ compatible.
 
 This corresponds to the mapfile: gcc/libgcc-std.ver
 
-gcc-3.0.0: GCC_3.0
-gcc-3.3.0: GCC_3.3
-gcc-3.3.1: GCC_3.3.1
-gcc-3.3.2: GCC_3.3.2
-gcc-3.3.4: GCC_3.3.4
-gcc-3.4.0: GCC_3.4
-gcc-3.4.2: GCC_3.4.2
-gcc-3.4.4: GCC_3.4.4
-gcc-4.0.0: GCC_4.0.0
-gcc-4.1.0: GCC_4.1.0
-gcc-4.2.0: GCC_4.2.0
-gcc-4.3.0: GCC_4.3.0
-gcc-4.4.0: GCC_4.4.0
+GCC 3.0.0: GCC_3.0
+GCC 3.3.0: GCC_3.3
+GCC 3.3.1: GCC_3.3.1
+GCC 3.3.2: GCC_3.3.2
+GCC 3.3.4: GCC_3.3.4
+GCC 3.4.0: GCC_3.4
+GCC 3.4.2: GCC_3.4.2
+GCC 3.4.4: GCC_3.4.4
+GCC 4.0.0: GCC_4.0.0
+GCC 4.1.0: GCC_4.1.0
+GCC 4.2.0: GCC_4.2.0
+GCC 4.3.0: GCC_4.3.0
+GCC 4.4.0: GCC_4.4.0
+GCC 4.5.0: GCC_4.5.0
+GCC 4.6.0: GCC_4.6.0
+GCC 4.7.0: GCC_4.7.0
 
 
 
@@ -246,49 +249,62 @@ compatible.
 It is versioned as follows:
 
 
-gcc-3.0.0: libstdc++.so.3.0.0
-gcc-3.0.1: libstdc++.so.3.0.1
-gcc-3.0.2: libstdc++.so.3.0.2
-gcc-3.0.3: libstdc++.so.3.0.2 (See Note 
1)
-gcc-3.0.4: libstdc++.so.3.0.4
-gcc-3.1.0: libstdc++.so.4.0.0 (Incompatible with 
previous)
-gcc-3.1.1: libstdc++.so.4.0.1
-gcc-3.2.0: libstdc++.so.5.0.0 (Incompatible with 
previous)
-gcc-3.2.1: libstdc++.so.5.0.1
-gcc-3.2.2: libstdc++.so.5.0.2
-gcc-3.2.3: libstdc++.so.5.0.3 (See Note 
2)
-gcc-3.3.0: libstdc++.so.5.0.4
-gcc-3.3.1: libstdc++.so.5.0.5
-gcc-3.3.2: libstdc++.so.5.0.5
-gcc-3.3.3: libstdc++.so.5.0.5
-gcc-3.4.0: libstdc++.so.6.0.0 (Incompatible with 
previous)
-gcc-3.4.1: libstdc++.so.6.0.1
-gcc-3.4.2: libstdc++.so.6.0.2
-gcc-3.4.3: libs

[patch] PR51347 alias problem

2011-12-02 Thread Patrick Marlier

Hi,

PR51347 shows up a problem due to the TM IPA rework. 
tree_function_versioning segfault because the cfg of old_decl (alias) is 
NULL.
Indeed, an alias can get called but tm cg data are in the parent of the 
alias.


Bootstrapped and regtested.

Thanks,
Patrick.

ChangeLog
2011-12-02  Patrick Marlier  

PR c++/51347
* trans-mem.c (ipa_tm_scan_calls_block): Use parent node of 
aliases.

(ipa_tm_decrement_clone_counts): Likewise.

testsuite/ChangeLog
2011-12-02  Patrick Marlier  

PR c++/51347
* g++.dg/tm/pr51347.C: New test.
* g++.dg/tm/ctor-used.C: new test.
Index: testsuite/g++.dg/tm/ctor-used.C
===
--- testsuite/g++.dg/tm/ctor-used.C	(revision 0)
+++ testsuite/g++.dg/tm/ctor-used.C	(revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-fgnu-tm -fdump-tree-optimized" } */
+
+struct C {
+  long l;
+  C():l(0) {}
+};
+
+int main()
+{
+  C* alloc;
+  __transaction_atomic {
+alloc = new C;
+  }
+  alloc->l = 2;
+
+  return 0;
+}
+/* { dg-final { scan-assembler-not "_ITM_getTMCloneOrIrrevocable" } } */
+/* { dg-final { scan-tree-dump-times ";; Function C::C" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: testsuite/g++.dg/tm/pr51347.C
===
--- testsuite/g++.dg/tm/pr51347.C	(revision 0)
+++ testsuite/g++.dg/tm/pr51347.C	(revision 0)
@@ -0,0 +1,37 @@
+// { dg-do compile }
+// { dg-options "-fgnu-tm -O1" }
+
+template
+class BugContainer {
+public:
+BugContainer() {
+elem = new ValueType();
+}
+
+virtual ~BugContainer() {
+delete elem;
+}
+
+void bug() {
+delete elem;
+elem = new ValueType();
+}
+
+protected:
+ValueType *elem;
+};
+
+struct Info {
+BugContainer vec;
+};
+
+int main(int argc, char *argv[])
+{
+BugContainer bugs;
+
+__transaction_atomic {
+  bugs.bug();
+}
+
+return 0;
+}
Index: trans-mem.c
===
--- trans-mem.c	(revision 181888)
+++ trans-mem.c	(working copy)
@@ -3566,6 +3566,10 @@ ipa_tm_scan_calls_block (cgraph_node_queue *callee
 
 	  node = cgraph_get_node (fndecl);
 	  gcc_assert (node != NULL);
+
+	  if (node->alias)
+		node = cgraph_get_node (node->thunk.alias);
+
 	  d = get_cg_data (node);
 
 	  pcallers = (for_clone ? &d->tm_callers_clone
@@ -3849,13 +3853,20 @@ ipa_tm_decrement_clone_counts (basic_block bb, boo
 	{
 	  struct tm_ipa_cg_data *d;
 	  unsigned *pcallers;
+	  struct cgraph_node *node;
 
 	  if (is_tm_ending_fndecl (fndecl))
 		continue;
 	  if (find_tm_replacement_function (fndecl))
 		continue;
 
-	  d = get_cg_data (cgraph_get_node (fndecl));
+	  node = cgraph_get_node (fndecl);
+	  gcc_assert (node != NULL);
+
+	  if (node->alias)
+		node = cgraph_get_node (node->thunk.alias);
+
+	  d = get_cg_data (node);
 	  pcallers = (for_clone ? &d->tm_callers_clone
 			  : &d->tm_callers_normal);
 
@@ -4179,7 +4190,7 @@ struct create_version_alias_info
   tree new_decl;
 };
 
-/* A subrontine of ipa_tm_create_version, called via
+/* A subroutine of ipa_tm_create_version, called via
cgraph_for_node_and_aliases.  Create new tm clones for each of
the existing aliases.  */
 static bool


Re: rs6000 options change for rtems.h

2011-12-02 Thread Joseph S. Myers
On Fri, 2 Dec 2011, Joel Sherrill wrote:

> OK.  I obviously read too much into the other uses.
> I did not intend to change semantics just account for
> the change making this not compile.
> 
> How does the the new version look?

This version is OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


libgo patch committed: Generate dependencies automatically

2011-12-02 Thread Ian Lance Taylor
This patch adds automatic dependency generation to the packages in
libgo.  This saves me from having to fix all the dependencies each time
I import a new version of the library.  As can be seen in this patch, I
really need to learn how to use autogen.  Bootstrapped and ran Go
testsuite on x86_64-unknown-linux-gnu.  Committed to mainline.

Ian

diff -r 0d38523a2d7f libgo/Makefile.am
--- a/libgo/Makefile.am	Fri Dec 02 11:33:22 2011 -0800
+++ b/libgo/Makefile.am	Fri Dec 02 16:12:19 2011 -0800
@@ -1801,6 +1801,12 @@
 GOLINK = $(LIBTOOL) --tag GO --mode-link $(GOC) \
 	$(OPT_LDFLAGS) $(SECTION_LDFLAGS) $(AM_GOCFLAGS) $(LTLDFLAGS) -o $@
 
+# Build the dependencies for a Go package.
+BUILDDEPS = \
+	$(MKDIR_P) $(@D); \
+	$(SHELL) $(srcdir)/godeps.sh `echo $@ | sed -e 's/.dep$$//'` $^ > $@.tmp; \
+	mv -f $@.tmp $@
+
 # Build the .go files for a package, generating a .lo file.
 BUILDPACKAGE = \
 	$(MKDIR_P) $(@D); \
@@ -1873,27 +1879,37 @@
 	$(toolexeclibgosync_DATA) \
 	$(toolexeclibgotesting_DATA)
 
-asn1/asn1.lo: $(go_asn1_files) big.gox bytes.gox fmt.gox io.gox os.gox \
-		reflect.gox strconv.gox strings.gox time.gox
+@go_include@ asn1/asn1.lo.dep
+asn1/asn1.lo.dep: $(go_asn1_files)
+	$(BUILDDEPS)
+asn1/asn1.lo: $(go_asn1_files)
 	$(BUILDPACKAGE)
 asn1/check: $(CHECK_DEPS)
 	@$(CHECK)
 .PHONY: asn1/check
 
-big/big.lo: $(go_big_files) encoding/binary.gox fmt.gox io.gox os.gox \
-		rand.gox strings.gox
+@go_include@ big/big.lo.dep
+big/big.lo.dep: $(go_big_files)
+	$(BUILDDEPS)
+big/big.lo: $(go_big_files)
 	$(BUILDPACKAGE)
 big/check: $(CHECK_DEPS)
 	@$(CHECK)
 .PHONY: big/check
 
-bufio/bufio.lo: $(go_bufio_files) bytes.gox io.gox os.gox strconv.gox utf8.gox
+@go_include@ bufio/bufio.lo.dep
+bufio/bufio.lo.dep: $(go_bufio_files)
+	$(BUILDDEPS)
+bufio/bufio.lo: $(go_bufio_files)
 	$(BUILDPACKAGE)
 bufio/check: $(CHECK_DEPS)
 	@$(CHECK)
 .PHONY: bufio/check
 
-bytes/bytes.lo: $(go_bytes_files) io.gox os.gox unicode.gox utf8.gox
+@go_include@ bytes/bytes.lo.dep
+bytes/bytes.lo.dep: $(go_bytes_files)
+	$(BUILDDEPS)
+bytes/bytes.lo: $(go_bytes_files)
 	$(BUILDPACKAGE)
 bytes/index.lo: $(go_bytes_c_files) bytes/bytes.lo
 	$(LTCOMPILE) -c -o bytes/index.lo $(srcdir)/go/bytes/indexbyte.c
@@ -1901,142 +1917,181 @@
 	@$(CHECK)
 .PHONY: bytes/check
 
-cmath/cmath.lo: $(go_cmath_files) math.gox
+@go_include@ cmath/cmath.lo.dep
+cmath/cmath.lo.dep: $(go_cmath_files)
+	$(BUILDDEPS)
+cmath/cmath.lo: $(go_cmath_files)
 	$(BUILDPACKAGE)
 cmath/check: $(CHECK_DEPS)
 	@$(CHECK)
 .PHONY: cmath/check
 
-crypto/crypto.lo: $(go_crypto_files) hash.gox
+@go_include@ crypto/crypto.lo.dep
+crypto/crypto.lo.dep: $(go_crypto_files)
+	$(BUILDDEPS)
+crypto/crypto.lo: $(go_crypto_files)
 	$(BUILDPACKAGE)
 crypto/check: $(CHECK_DEPS)
 	@$(CHECK)
 .PHONY: crypto/check
 
-csv/csv.lo: $(go_csv_files) bufio.gox bytes.gox fmt.gox io.gox os.gox \
-		strings.gox unicode.gox utf8.gox
+@go_include@ csv/csv.lo.dep
+csv/csv.lo.dep: $(go_csv_files)
+	$(BUILDDEPS)
+csv/csv.lo: $(go_csv_files)
 	$(BUILDPACKAGE)
 csv/check: $(CHECK_DEPS)
 	@$(CHECK)
 .PHONY: csv/check
 
-exec/exec.lo: $(go_exec_files) bytes.gox io.gox os.gox strconv.gox \
-		strings.gox syscall.gox
+@go_include@ exec/exec.lo.dep
+exec/exec.lo.dep: $(go_exec_files)
+	$(BUILDDEPS)
+exec/exec.lo: $(go_exec_files)
 	$(BUILDPACKAGE)
 exec/check: $(CHECK_DEPS)
 	@$(CHECK)
 .PHONY: exec/check
 
-expvar/expvar.lo: $(go_expvar_files) bytes.gox fmt.gox http.gox json.gox \
-		log.gox os.gox runtime.gox strconv.gox sync.gox
+@go_include@ expvar/expvar.lo.dep
+expvar/expvar.lo.dep: $(go_expvar_files)
+	$(BUILDDEPS)
+expvar/expvar.lo: $(go_expvar_files)
 	$(BUILDPACKAGE)
 expvar/check: $(CHECK_DEPS)
 	@$(CHECK)
 .PHONY: expvar/check
 
-flag/flag.lo: $(go_flag_files) fmt.gox os.gox strconv.gox
+@go_include@ flag/flag.lo.dep
+flag/flag.lo.dep: $(go_flag_files)
+	$(BUILDDEPS)
+flag/flag.lo: $(go_flag_files)
 	$(BUILDPACKAGE)
 flag/check: $(CHECK_DEPS)
 	@$(CHECK)
 .PHONY: flag/check
 
-fmt/fmt.lo: $(go_fmt_files) bytes.gox io.gox math.gox os.gox reflect.gox \
-		strconv.gox strings.gox sync.gox unicode.gox utf8.gox
+@go_include@ fmt/fmt.lo.dep
+fmt/fmt.lo.dep: $(go_fmt_files)
+	$(BUILDDEPS)
+fmt/fmt.lo: $(go_fmt_files)
 	$(BUILDPACKAGE)
 fmt/check: $(CHECK_DEPS)
 	@$(CHECK)
 .PHONY: fmt/check
 
-gob/gob.lo: $(go_gob_files) bufio.gox bytes.gox fmt.gox io.gox math.gox \
-		os.gox reflect.gox runtime.gox strings.gox sync.gox \
-		unicode.gox utf8.gox
+@go_include@ gob/gob.lo.dep
+gob/gob.lo.dep: $(go_gob_files)
+	$(BUILDDEPS)
+gob/gob.lo: $(go_gob_files)
 	$(BUILDPACKAGE)
 gob/check: $(CHECK_DEPS)
 	@$(CHECK)
 .PHONY: gob/check
 
-hash/hash.lo: $(go_hash_files) io.gox
+@go_include@ hash/hash.lo.dep
+hash/hash.lo.dep: $(go_hash_files)
+	$(BUILDDEPS)
+hash/hash.lo: $(go_hash_files)
 	$(BUILDPACKAGE)
 hash/check: $(CHECK_DEPS)
 	@$(CHECK)
 .PHONY: hash/check
 
-html/html.lo: $(go_html_files) bufio.gox bytes.gox fmt.gox io.gox os.gox \
-		strconv.gox strings.gox utf8.gox
+@go_include@ html/html.lo.dep
+html/html.l

Re: [PATCH] Implement stap probe on ARM's unwinder

2011-12-02 Thread Richard Henderson
On 12/02/2011 04:25 AM, Bernd Schmidt wrote:
> Doesn't look like it would cause problems. I have no idea what
> builtin_frob_return_addr does but it appears to exist everywhere.

It's for adjusting the return address in magic ways.  E.g. Sparc pc+8 for 
structure returns, ARM low bit for thumb returns.  I.e. whatever the target
needs for EH handling.


r~


Re: [RFC] Port libitm to powerpc

2011-12-02 Thread Iain Sandoe

Hi Richard,

On 2 Dec 2011, at 22:59, Richard Henderson wrote:


On 12/02/2011 05:37 AM, Iain Sandoe wrote:
Richard: things that I did, intentionally, differently (and I'm not  
sure are correct).


1. I saved the CR
2. Once the vrs are saved, I update the VRsave mask to reflect that.


Don't update VRsave.  This gives the OS license to clobber those  
registers on task switch, which means that you'd need to reload all  
of those registers and restore VRsave after calling  
_GTM_begin_transaction.


thanks (will fix when updating).


+   mffs f0
+   stfd f31,-8(r1)


You didn't actually save FPSCRS, only copied it to f0.


it's saved after the stack is updated - because of being outside the  
red zone.



+#ifdef __ppc64__
+  double fpscr;/* ??? should we save this.  */
+  unsigned int vscrpad;
+  unsigned int vscr;   /* VRsave */
+#else
+  double fpscr;/* ??? should we save this.  */
+  unsigned int vscrpad[2]; 
+  unsigned int vscr;   /* VRsave */
+#endif


Why the difference?


...because the VRsave reg is included in the red-zone for m32 but not  
for m64...


it also makes the alignment padding explicit ...


+  unsigned int cr; /* Saved CR.  */
+#ifdef __ppc64__
+  unsigned int crpad;
+#endif


Why not just make it unsigned long and be done with it?  There's  
nothing saying that you can't use lg/stg for the gp that holds the  
crs.  Nothing except for your corresponding longjmp cares about the  
layout.


yeah .. it got like that because of ...

I personally think the whole thing would be much easier to read  
without relying on the redzone.  Aside from that, there's actually  
very little real difference in the two files.  Essentially, you're  
storing the registers in a different order because the prologue  
does, just so you can make use of the redzone.


OK -  I guess I got carried away with thinking that I might be able to  
re-use the save_world () routine - but that doesn't look feasible  
after all so


The aix abi saves r2; darwin 32-bit saves r13.  One extra register  
in both cases, which could use the same slot.


... will take another look tomorrow
 although we still have some syntax issues that might make sharing  
the original code somewhat ugly




... in the meantime, I found two obvious stupid typos in my current  
version - the attached runs without any (unexpected) fails;


Iain

Native configuration is powerpc-apple-darwin9

=== libitm tests ===

Schedule of variations:
unix/-m32
unix/-m64

Running target unix/-m32
Using /usr/local/dejagnu-1-4-4/share/dejagnu/baseboards/unix.exp as  
board description file for target.
Using /usr/local/dejagnu-1-4-4/share/dejagnu/config/unix.exp as  
generic interface file for target.
Using /GCC/gcc-live-trunk/libitm/testsuite/config/default.exp as tool- 
and-target-specific interface file.

Running /GCC/gcc-live-trunk/libitm/testsuite/libitm.c/c.exp ...
Running /GCC/gcc-live-trunk/libitm/testsuite/libitm.c++/c++.exp ...
WARNING: libitm.c++/static_ctor.C compilation failed to produce  
executable


=== libitm Summary for unix/-m32 ===

# of expected passes23
# of expected failures  5
Running target unix/-m64
Using /usr/local/dejagnu-1-4-4/share/dejagnu/baseboards/unix.exp as  
board description file for target.
Using /usr/local/dejagnu-1-4-4/share/dejagnu/config/unix.exp as  
generic interface file for target.
Using /GCC/gcc-live-trunk/gcc/testsuite/config/default.exp as tool-and- 
target-specific interface file.
WARNING: libitm.c++/static_ctor.C compilation failed to produce  
executable


=== libitm Summary for unix/-m64 ===

# of expected passes23
# of expected failures  5

=== libitm Summary ===

# of expected passes46
# of expected failures  10



Index: libitm/config/darwin/powerpc/sjlj.S
===
--- libitm/config/darwin/powerpc/sjlj.S (revision 0)
+++ libitm/config/darwin/powerpc/sjlj.S (revision 0)
@@ -0,0 +1,345 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Iain Sandoe .
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Sof

Re: [RFC] Port libitm to powerpc

2011-12-02 Thread Richard Henderson
On 12/02/2011 05:37 AM, Iain Sandoe wrote:
> Richard: things that I did, intentionally, differently (and I'm not sure are 
> correct).
> 
> 1. I saved the CR
> 2. Once the vrs are saved, I update the VRsave mask to reflect that.

Don't update VRsave.  This gives the OS license to clobber those registers on 
task switch, which means that you'd need to reload all of those registers and 
restore VRsave after calling _GTM_begin_transaction.

> + mffs f0
> + stfd f31,-8(r1)

You didn't actually save FPSCRS, only copied it to f0.

> +#ifdef __ppc64__
> +  double fpscr;  /* ??? should we save this.  */
> +  unsigned int vscrpad;  
> +  unsigned int vscr; /* VRsave */
> +#else
> +  double fpscr;  /* ??? should we save this.  */
> +  unsigned int vscrpad[2];   
> +  unsigned int vscr; /* VRsave */
> +#endif

Why the difference?

> +  unsigned int cr;   /* Saved CR.  */
> +#ifdef __ppc64__
> +  unsigned int crpad;
> +#endif

Why not just make it unsigned long and be done with it?  There's nothing saying 
that you can't use lg/stg for the gp that holds the crs.  Nothing except for 
your corresponding longjmp cares about the layout.

I personally think the whole thing would be much easier to read without relying 
on the redzone.  Aside from that, there's actually very little real difference 
in the two files.  Essentially, you're storing the registers in a different 
order because the prologue does, just so you can make use of the redzone.  

The aix abi saves r2; darwin 32-bit saves r13.  One extra register in both 
cases, which could use the same slot.


r~


[pph] Re-factor merge reads for lang_decl (issue5440087)

2011-12-02 Thread Diego Novillo
This patch adds support for doing merge reads of lang_decl by 
processing PPH_RECORD_START_MERGE_BODY records.  I need it for
the next patch that will emit the merge keys for namespaces.


Tested on x86_64.


Diego.

* pph-in.c (pph_in_lang_decl_start): Rename from pph_in_ref_lang_decl.
Handle PPH_RECORD_START_MERGE_BODY records.
Add argument IS_MERGE_P.  Set it to true if the caller should do
a merge read.
Update all users.
(pph_in_lang_decl): Call pph_in_merge_ld_base when reading
a merge record.
(pph_in_merge_lang_decl): Remove.  Update all users.

diff --git a/gcc/cp/pph-in.c b/gcc/cp/pph-in.c
index ce6aeae..5474c5a 100644
--- a/gcc/cp/pph-in.c
+++ b/gcc/cp/pph-in.c
@@ -1522,16 +1522,18 @@ pph_in_ld_parm (pph_stream *stream, struct 
lang_decl_parm *ldp)
 }
 
 
-/* Read potential reference to a lang decl specific.  Return null when
-   either the read pointer is null, or it is already in the cache.
-   Otherwise, return the pointer to the lang decl specific.  */
+/* Read from STREAM the start of a lang_decl record for DECL.  If the
+   caller should do a merge-read, set *IS_MERGE_P to true.  Return
+   lang_decl structure associated with DECL.  If this function returns
+   NULL, it means that the lang_decl record has already been read and
+   nothing else needs to be done.  */
 
 static struct lang_decl *
-pph_in_ref_lang_decl (pph_stream *stream, tree decl)
+pph_in_lang_decl_start (pph_stream *stream, tree decl, bool *is_merge_p)
 {
-  struct lang_decl *ld;
   enum pph_record_marker marker;
   unsigned image_ix, ix;
+  struct lang_decl *ld;
 
   marker = pph_in_start_record (stream, &image_ix, &ix, PPH_lang_decl);
   if (marker == PPH_RECORD_END)
@@ -1543,22 +1545,40 @@ pph_in_ref_lang_decl (pph_stream *stream, tree decl)
 PPH_lang_decl);
   return NULL;
 }
+  else if (marker == PPH_RECORD_START_MERGE_BODY)
+{
+  /* If we are about to read the merge body for this lang_decl
+structure, the instance we found in the cache, must be the
+same one associated with DECL.  */
+  ld = (struct lang_decl *) pph_cache_get (&stream->cache, ix);
+  gcc_assert (ld == DECL_LANG_SPECIFIC (decl));
+  *is_merge_p = true;
+}
+  else
+{
+  gcc_assert (marker == PPH_RECORD_START);
 
-  /* Remove if we start emitting merge keys for this structure.  */
-  gcc_assert (marker == PPH_RECORD_START);
+  /* FIXME pph, we should not be getting a DECL_LANG_SPECIFIC
+instance here.  This is being allocated by
+pph_in_merge_key_namespace_decl, but this should be the only
+place where we allocate it.
 
-  /* Allocate a lang_decl structure for DECL, if not already present.
- Namespace merge keys preallocate it.  */
-  ld = DECL_LANG_SPECIFIC (decl);
-  if (!ld)
-{
-  retrofit_lang_decl (decl);
+   Change the if() below to:
+ gcc_assert (DECL_LANG_SPECIFIC (decl) == NULL);
+  */
+  if (DECL_LANG_SPECIFIC (decl) == NULL)
+   {
+ /* Allocate a lang_decl structure for DECL.  */
+ retrofit_lang_decl (decl);
+   }
   ld = DECL_LANG_SPECIFIC (decl);
+
+  /* Now register it.  We would normally use ALLOC_AND_REGISTER,
+but retrofit_lang_decl does not return a pointer.  */
+  pph_cache_insert_at (&stream->cache, ld, ix, PPH_lang_decl);
+  *is_merge_p = false;
 }
 
-  /* Now register it.  We would normally use ALLOC_AND_REGISTER,
- but retrofit_lang_decl does not return a pointer.  */
-  pph_cache_insert_at (&stream->cache, ld, ix, PPH_lang_decl);
   return ld;
 }
 
@@ -1570,57 +1590,19 @@ pph_in_lang_decl (pph_stream *stream, tree decl)
 {
   struct lang_decl *ld;
   struct lang_decl_base *ldb;
+  bool is_merge;
 
-  ld = pph_in_ref_lang_decl (stream, decl);
-  if (!ld)
+  ld = pph_in_lang_decl_start (stream, decl, &is_merge);
+  if (ld == NULL)
 return;
 
   /* Read all the fields in lang_decl_base.  */
   ldb = &ld->u.base;
-  pph_in_ld_base (stream, ldb);
-
-  if (ldb->selector == 0)
-{
-  /* Read all the fields in lang_decl_min.  */
-  pph_in_ld_min (stream, &ld->u.min);
-}
-  else if (ldb->selector == 1)
-{
-  /* Read all the fields in lang_decl_fn.  */
-  pph_in_ld_fn (stream, &ld->u.fn);
-}
-  else if (ldb->selector == 2)
-{
-  /* Read all the fields in lang_decl_ns.  */
-  pph_in_ld_ns (stream, &ld->u.ns);
-}
-  else if (ldb->selector == 3)
-{
-  /* Read all the fields in lang_decl_parm.  */
-  pph_in_ld_parm (stream, &ld->u.parm);
-}
+  if (is_merge)
+pph_in_merge_ld_base (stream, ldb);
   else
-gcc_unreachable ();
-}
-
+pph_in_ld_base (stream, ldb);
 
-/* Read and merge language specific data in DECL from STREAM.  */
-
-static void
-pph_in_merge_lang_decl (pph_stream *stream, tree decl)
-{
-  struct lang_decl *ld;
-  struct lang_decl_base *ldb;
-
-  ld = pph_in_ref_lang_decl (s

Re: FW: [PATCH][Cilkplus] Low cost annotations implementation

2011-12-02 Thread H.J. Lu
On Fri, Dec 2, 2011 at 2:35 PM, Iyer, Balaji V  wrote:
> Hello H. J.,
>   Here are the fixed patches. I have numbered their order from 1 through 4.
>

The ChangeLog entries should be added to the beginning of
ChangeLog, not in the middle.

-- 
H.J.


Re: [PATCH] Fix AVX2 mulv32qi expander (PR target/51387)

2011-12-02 Thread Richard Henderson
On 12/02/2011 11:18 AM, Jakub Jelinek wrote:
>   PR target/51387
>   * config/i386/sse.md (mul3 with VI1_AVX2 iterator): For
>   V32QImode use { 0,2,..,14,32,34,..,46,16,18,..,30,48,50,..,62 }
>   permutation instead of extract even permutation.

Ok.


r~


[Patch, Fortran] Fix MOVE_ALLOC check

2011-12-02 Thread Tobias Burnus
This patches fixes my previous MOVE_ALLOC patch. The standard states for 
TO: "It shall be polymorphic if FROM is polymorphic."


I somehow read this bijectively, but the it is actually allowed to have 
a nonpolymorphic FROM with a polymorphic TO. Thanks for Damian for 
finding this.


Build and regtested on x86-64-linux.
OK for the trunk?

Tobias

PS: Other pending patches:
- http://gcc.gnu.org/ml/fortran/2011-11/msg00249.html - Pointer 
INTENT(IN) check for MOVE_ALLOC [4.6/4.7 rejects-valid regression]
- http://gcc.gnu.org/ml/fortran/2011-11/msg00250.html - no 
-fcheck=bounds for character(LEN=:) to avoid ICE
- http://gcc.gnu.org/ml/fortran/2011-11/msg00253.html - (Re)enable 
warning if a function result variable is not set [4.4-4.7 diagnostics 
regression]
- http://gcc.gnu.org/ml/fortran/2011-11/msg00254.html - Thomas' 
dependency-ICE patch [4.6/4.7 regression]
- http://gcc.gnu.org/ml/fortran/2011-12/msg5.html - Fix 
component-access check
Note: select_type_23.f03 is actually invalid as "sm2", i.e.
the associate-name in SELECT TYPE, is not allocatable. See
PR fortran/48887 for details

2011-12-02  Tobias Burnus  

	* check.c (gfc_check_move_alloc): Allow nonpolymorphic
	FROM with polymorphic TO.
	* trans-intrinsic.c (conv_intrinsic_move_alloc): Handle
	nonpolymorphic FROM with polymorphic TO.

2011-12-02  Tobias Burnus  

	* gfortran.dg/select_type_23.f03: Revert Rev. 181801,
	i.e. remove the dg-error line.
	* gfortran.dg/move_alloc_5.f90: Ditto and change back
	to dg-do run.
	* gfortran.dg/move_alloc_9.f90: New.
	* gfortran.dg/move_alloc_10.f90: New

diff --git a/gcc/fortran/check.c b/gcc/fortran/check.c
index c3f3cc2..94de31b 100644
--- a/gcc/fortran/check.c
+++ b/gcc/fortran/check.c
@@ -2702,17 +2702,17 @@ gfc_check_move_alloc (gfc_expr *from, gfc_expr *to)
   if (allocatable_check (to, 1) == FAILURE)
 return FAILURE;
 
-  if (same_type_check (to, 1, from, 0) == FAILURE)
-return FAILURE;
-
-  if (to->ts.type != from->ts.type)
+  if (from->ts.type == BT_CLASS && to->ts.type == BT_DERIVED)
 {
-  gfc_error ("The FROM and TO arguments in MOVE_ALLOC call at %L must be "
-		 "either both polymorphic or both nonpolymorphic",
+  gfc_error ("The TO arguments in MOVE_ALLOC at %L must be "
+		 "polymorphic if FROM is polymorphic",
 		 &from->where);
   return FAILURE;
 }
 
+  if (same_type_check (to, 1, from, 0) == FAILURE)
+return FAILURE;
+
   if (to->rank != from->rank)
 {
   gfc_error ("the '%s' and '%s' arguments of '%s' intrinsic at %L must "
@@ -2732,7 +2732,7 @@ gfc_check_move_alloc (gfc_expr *from, gfc_expr *to)
   return FAILURE;
 }
 
-  /* CLASS arguments: Make sure the vtab is present.  */
+  /* CLASS arguments: Make sure the vtab of from is present.  */
   if (to->ts.type == BT_CLASS)
 gfc_find_derived_vtab (from->ts.u.derived);
 
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index 5da2c79..05bb095 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -7192,7 +7192,7 @@ conv_intrinsic_move_alloc (gfc_code *code)
 {
   stmtblock_t block;
   gfc_expr *from_expr, *to_expr;
-  gfc_expr *to_expr2, *from_expr2;
+  gfc_expr *to_expr2, *from_expr2 = NULL;
   gfc_se from_se, to_se;
   gfc_ss *from_ss, *to_ss;
   tree tmp;
@@ -7207,16 +7207,21 @@ conv_intrinsic_move_alloc (gfc_code *code)
 
   if (from_expr->rank == 0)
 {
+  gcc_assert (from_expr->ts.type != BT_CLASS
+		  || to_expr->ts.type == BT_CLASS);
   if (from_expr->ts.type != BT_CLASS)
+	from_expr2 = from_expr;
+  else
 	{
-	  from_expr2 = to_expr;
-	  to_expr2 = to_expr;
+	  from_expr2 = gfc_copy_expr (from_expr);
+	  gfc_add_data_component (from_expr2);
 	}
+
+  if (to_expr->ts.type != BT_CLASS)
+	to_expr2 = to_expr;
   else
 	{
 	  to_expr2 = gfc_copy_expr (to_expr);
-	  from_expr2 = gfc_copy_expr (from_expr);
-	  gfc_add_data_component (from_expr2);
 	  gfc_add_data_component (to_expr2);
 	}
 
@@ -7244,48 +7249,72 @@ conv_intrinsic_move_alloc (gfc_code *code)
   gfc_add_block_to_block (&block, &to_se.post);
 
   /* Set _vptr.  */
-  if (from_expr->ts.type == BT_CLASS)
+  if (to_expr->ts.type == BT_CLASS)
 	{
-	  gfc_free_expr (from_expr2);
-  gfc_free_expr (to_expr2);
-
-	  gfc_init_se (&from_se, NULL);
+	  gfc_free_expr (to_expr2);
 	  gfc_init_se (&to_se, NULL);
-	  from_se.want_pointer = 1;
 	  to_se.want_pointer = 1;
-	  gfc_add_vptr_component (from_expr);
 	  gfc_add_vptr_component (to_expr);
-
-	  gfc_conv_expr (&from_se, from_expr);
 	  gfc_conv_expr (&to_se, to_expr);
+
+	  if (from_expr->ts.type == BT_CLASS)
+	{
+	  gfc_free_expr (from_expr2);
+	  gfc_init_se (&from_se, NULL);
+	  from_se.want_pointer = 1;
+	  gfc_add_vptr_component (from_expr);
+	  gfc_conv_expr (&from_se, from_expr);
+	  tmp = from_se.expr;
+	}
+	  else
+	{
+	  gfc_symbol *vtab;
+	  vtab = gfc_find_derived_vtab (from_expr->ts.u.derived);
+	  gcc_assert (vtab);
+	 

[Patch, Fortran] PR 51378 Fix component-access check

2011-12-02 Thread Tobias Burnus
Found via Reinhold Bader's test suite: If a component is public, it 
remains public even if the extended type has PRIVATE.


Build and regtested on x86-64-linux.
OK for the trunk?

Tobias
2011-12-02  Tobias Burnus  

	PR fortran/51378
	* symbol.c (gfc_find_component): Fix access check of parent
	components.

2011-12-02  Tobias Burnus  

	PR fortran/51378
	* gfortran.dg/private_type_14.f90: New.

diff --git a/gcc/fortran/symbol.c b/gcc/fortran/symbol.c
index de42297..fcc1ccf 100644
--- a/gcc/fortran/symbol.c
+++ b/gcc/fortran/symbol.c
@@ -2022,6 +2022,21 @@ gfc_find_component (gfc_symbol *sym, const char *name,
 if (strcmp (p->name, name) == 0)
   break;
 
+  if (p && sym->attr.use_assoc && !noaccess)
+{
+  bool is_parent_comp = sym->attr.extension && (p == sym->components);
+  if (p->attr.access == ACCESS_PRIVATE ||
+	  (p->attr.access != ACCESS_PUBLIC
+	   && sym->component_access == ACCESS_PRIVATE
+	   && !is_parent_comp))
+	{
+	  if (!silent)
+	gfc_error ("Component '%s' at %C is a PRIVATE component of '%s'",
+		   name, sym->name);
+	  return NULL;
+	}
+}
+
   if (p == NULL
 	&& sym->attr.extension
 	&& sym->components->ts.type == BT_DERIVED)
@@ -2037,21 +2052,6 @@ gfc_find_component (gfc_symbol *sym, const char *name,
 gfc_error ("'%s' at %C is not a member of the '%s' structure",
 	   name, sym->name);
 
-  else if (sym->attr.use_assoc && !noaccess)
-{
-  bool is_parent_comp = sym->attr.extension && (p == sym->components);
-  if (p->attr.access == ACCESS_PRIVATE ||
-	  (p->attr.access != ACCESS_PUBLIC
-	   && sym->component_access == ACCESS_PRIVATE
-	   && !is_parent_comp))
-	{
-	  if (!silent)
-	gfc_error ("Component '%s' at %C is a PRIVATE component of '%s'",
-		   name, sym->name);
-	  return NULL;
-	}
-}
-
   return p;
 }
 
--- /dev/null	2011-12-02 08:02:36.367523993 +0100
+++ gcc/gcc/testsuite/gfortran.dg/private_type_14.f90	2011-12-02 09:31:05.0 +0100
@@ -0,0 +1,43 @@
+! { dg-do compile }
+!
+! PR fortran/51378
+!
+! Allow constructor to nonprivate parent compoents,
+! even if the extension specified PRIVATE for its own components
+!
+! Contributed by Reinhold Bader
+!
+module type_ext
+  type :: vec
+ real, dimension(3) :: comp
+ integer :: len
+  end type vec
+  type, extends(vec) :: l_vec
+ private
+ character(len=20) :: label = '01234567890123456789'
+  end type l_vec
+end module type_ext
+program test_ext
+  use type_ext
+  implicit none
+  type(vec) :: o_vec, oo_vec
+  type(l_vec) :: o_l_vec
+  integer :: i
+!
+  o_vec = vec((/1.0, 2.0, 3.0/),3)
+!  write(*,*) o_vec%comp, o_vec%len
+  o_l_vec = l_vec(comp=(/1.0, 2.0, 3.0/),len=3)
+! partial constr. not accepted by ifort 11.1, fixed in 12.0 (issue 562240)
+!  write(*,*) o_l_vec%comp, o_l_vec%len
+!  write(*,*) o_l_vec%vec
+  oo_vec = o_l_vec%vec
+  do i=1, 3
+if (abs(oo_vec%comp(i) - o_vec%comp(i)) > 1.0E-5) then
+   write(*, *) 'FAIL'
+   stop
+end if
+  end do
+  write(*, *) 'OK'
+end program
+
+! { dg-final { cleanup-modules "type_ext" } }


Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread William J. Schmidt


On Fri, 2011-12-02 at 13:46 -0700, Jeff Law wrote:
> On 12/02/11 12:27, William J. Schmidt wrote:
> 
> > 
> > Erm, wait.  How are PHIs in different blocks going to have the
> > same incoming edges?  (I was thinking of control dependence edges,
> > but these are just regular control flow incoming edges, right?)  So
> > this really isn't going to help.
> They're not.  But if we find an equivalence between phi_1 and phi_2,
> then we can replace every reference to phi_2 with phi_1.  This is safe
> because any reference to phi_2 must be dominated by the assignment to
> phi_2 which is in the same block as phi_1.
> 
> So while continuing to have the phis in the available expression table
> is not useful beyond the current block, the equivalency created when a
> redundant PHI is encountered is useful to keep.
> 
> I may have not made the distinction clearly in prior messages.  If
> that's what your patch does, then you're golden.

Ah, yes.  This is what I'm doing.  Sorry for the confusion!  And thanks
for the clarification.

Bill

> 
> Jeff
> 



Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread Jeff Law
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 12/02/11 12:27, William J. Schmidt wrote:

> 
> Erm, wait.  How are PHIs in different blocks going to have the
> same incoming edges?  (I was thinking of control dependence edges,
> but these are just regular control flow incoming edges, right?)  So
> this really isn't going to help.
They're not.  But if we find an equivalence between phi_1 and phi_2,
then we can replace every reference to phi_2 with phi_1.  This is safe
because any reference to phi_2 must be dominated by the assignment to
phi_2 which is in the same block as phi_1.

So while continuing to have the phis in the available expression table
is not useful beyond the current block, the equivalency created when a
redundant PHI is encountered is useful to keep.

I may have not made the distinction clearly in prior messages.  If
that's what your patch does, then you're golden.

Jeff

-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJO2TktAAoJEBRtltQi2kC7hUsH/iWomNPVNnGw00FhLVBuiIVC
c/Tsirdu256H07v1yZBemoh65EEOiqy6gHbKV2ZgL8MADJYm4i17Ii/0CEJyXHt2
YpplJmVY905WKqs2KN/qWHXAo7YFQwAj2MRWSksi2VMlq0YBHL+OA0qVlPLcNRUK
3e92nyERJAHgNlQqBLzGpeMLw8ozs8ognVZj9L/fbRWf4Jgnh5v5oPu50n9pWshO
ipq+J5Qbovli9c8lHq6etFZ3EVCCxahnZ4FF1rxI3mVOKnL90xFPprBB1jL6qcqx
8O79yTLK6M7u3CTmnD8KoociAMWOhoe34o8PQIYEtJC5Pops1jKViEIbsOQg9s0=
=NXbN
-END PGP SIGNATURE-


Re: FW: [PATCH][Cilkplus] Low cost annotations implementation

2011-12-02 Thread H.J. Lu
On Fri, Dec 2, 2011 at 12:24 PM, Iyer, Balaji V  wrote:
> Hello Everyone,
>    I found that patch 4 stepped on parts of patch 3. Here is the updated 
> patches (Please notice patch 4, 5 and 6 are replaced).
>
> Thanks,
>
> Balaji V. Iyer.
>
> 
> From: Iyer, Balaji V
> Sent: Wednesday, November 30, 2011 3:12 PM
> To: gcc-patches@gcc.gnu.org
> Subject: [PATCH][Cilkplus] Low cost annotations implementation
>
>  Hello Everyone,
>      The attached 7 patches are for the Cilkplus branch affecting both the C 
> and C++ compiler. These patches implement Low-Overhead Tool Annotations. 
> These annotations specifies two intrinsic that enable annotating executables 
> and tools without incurring significant run-time costs when the tools are not 
> in use. These annotations can be read by analyzers to gain insights about the 
> program. For example, Cilk screen and Cilk view use these annotations to 
> detect races and do scalability analysis on parallel programs. Even though 
> the annotations currently specified are Cilk Plus specific, the format is 
> generic and can be used by any tool-vendor to define additional annotations. 
> More information about these annotations can be found in the specification 
> available here: http://software.intel.com/file/39770. Cilk screen and Cilk 
> view are available in the Intel Cilk Plus SDK which can be downloaded for 
> free from the following Intel website: 
> http://software.intel.com/en-us/articles/intel-cilk-plus-software-development-kit/
>
>        The patches are numbered from 1 through 7, and please commit them in 
> order.
>

Please generate a new set of patches against
the current cilkplus branch.  The ChangeLog entries
should be added to the beginning of ChangeLog and
you shouldn't change existing ChangeLog entries. If
there is an error in existing ChangeLog entries, please
send a separate patch.


-- 
H.J.


[gcov] Distinguish exceptional lines

2011-12-02 Thread Nathan Sidwell
I've committed this patch that adds support to gcov to distinguish unexecuted 
exceptional lines.  Normally unexecuted lines are marked by '#'.  This patch 
augments that so that lines of code that are only reachable by an exceptional 
path (a catch clause in C++), are marked by '=' (half a # char) if they are 
unexecuted.


tested on i686-pc-linux-gnu.

Now to work on the fallout from working around bug 51113 ...
2011-12-02  Nathan Sidwell  

* gcov.c (struct arc_info): Add is_throw field.
(struct (block_info): Add exceptional field, reduce flags size to
account for it.
(struct function_info): Add has_catch field.
(struct line_info): Add unexceptional field.
(process_file): Call find_exception_blocks if necessary.
(read_graph_file): Adjust.  Note if an exceptional edge is seen.
(find_exception_blocks): New.
(add_line_counts): Set line's unexceptional flag if not
exceptional.
(output_branch_count): Note exceptional arcs, lines and blocks.
* gcov-dump.c (tag_arcs): Decode arc flags.
* doc/gcov.texi: Document '=' lines.

testsuite/
* lib/gcov.exp (verify-lines): Allow = as a count char.
* g++.dg/gcov/gcov-13.C: New.

Index: doc/gcov.texi
===
--- doc/gcov.texi   (revision 181929)
+++ doc/gcov.texi   (working copy)
@@ -261,10 +261,13 @@ program source code.  The format is
 
 Additional block information may succeed each line, when requested by
 command line option.  The @var{execution_count} is @samp{-} for lines
-containing no code and @samp{#} for lines which were never executed.
-Some lines of information at the start have @var{line_number} of zero.
+containing no code.  Unexecuted lines are marked @samp{#} or
+@samp{}, depending on whether they are reachable by
+non-exceptional paths or only exceptional paths such as C++ exception
+handlers, respectively.
 
-The preamble lines are of the form
+Some lines of information at the start have @var{line_number} of zero.
+These preamble lines are of the form
 
 @smallexample
 -:0:@var{tag}:@var{value}
Index: gcov.c
===
--- gcov.c  (revision 181929)
+++ gcov.c  (working copy)
@@ -88,6 +88,9 @@ typedef struct arc_info
   unsigned int fake : 1;
   unsigned int fall_through : 1;
 
+  /* Arc to a catch handler.  */
+  unsigned int is_throw : 1;
+
   /* Arc is for a function that abnormally returns.  */
   unsigned int is_call_non_return : 1;
 
@@ -123,10 +126,11 @@ typedef struct block_info
 
   /* Block execution count.  */
   gcov_type count;
-  unsigned flags : 13;
+  unsigned flags : 12;
   unsigned count_valid : 1;
   unsigned valid_chain : 1;
   unsigned invalid_chain : 1;
+  unsigned exceptional : 1;
 
   /* Block is a call instrumenting site.  */
   unsigned is_call_site : 1; /* Does the call.  */
@@ -172,6 +176,9 @@ typedef struct function_info
   unsigned lineno_checksum;
   unsigned cfg_checksum;
 
+  /* The graph contains at least one fake incoming edge.  */
+  unsigned has_catch : 1;
+
   /* Array of basic blocks.  */
   block_t *blocks;
   unsigned num_blocks;
@@ -224,6 +231,7 @@ typedef struct line_info
  in all-blocks mode.  */
   } u;
   unsigned exists : 1;
+  unsigned unexceptional : 1;
 } line_t;
 
 /* Describes a file mentioned in the block graph.  Contains an array
@@ -369,6 +377,7 @@ static unsigned find_source (const char
 static function_t *read_graph_file (void);
 static int read_count_file (function_t *);
 static void solve_flow_graph (function_t *);
+static void find_exception_blocks (function_t *);
 static void add_branch_counts (coverage_t *, const arc_t *);
 static void add_line_counts (coverage_t *, function_t *);
 static void function_summary (const coverage_t *, const char *);
@@ -628,6 +637,8 @@ process_file (const char *file_name)
sources[src].num_lines = line + 1;
  
  solve_flow_graph (fn);
+ if (fn->has_catch)
+   find_exception_blocks (fn);
  *fn_end = fn;
  fn_end = &fn->next;
}
@@ -1051,13 +1062,15 @@ read_graph_file (void)
{
  unsigned src = gcov_read_unsigned ();
  unsigned num_dests = GCOV_TAG_ARCS_NUM (length);
+ block_t *src_blk = &fn->blocks[src];
+ unsigned mark_catches = 0;
+ struct arc_info *arc;
 
  if (src >= fn->num_blocks || fn->blocks[src].succ)
goto corrupt;
 
  while (num_dests--)
{
- struct arc_info *arc;
  unsigned dest = gcov_read_unsigned ();
  unsigned flags = gcov_read_unsigned ();
 
@@ -1066,7 +1079,7 @@ read_graph_file (void)
  arc = XCNEW (arc_t);
 
  arc->dst = &fn->blocks[dest];
- arc->src = &fn->blocks[src];
+ arc->src = src_blk;
 
  a

Re: Adjust omp-low test for alignment

2011-12-02 Thread Hans-Peter Nilsson
On Fri, 2 Dec 2011, Mikael Pettersson wrote:
> but futexes must be 32-bit aligned (or at least not cross page
> boundaries).

Don't mix up futexes with hardware-mandated atomic alignment
(except that preferably the letter should not be more strict).

Linux futexes must be 32-bit aligned *for all architectures*.

Linux uses the two low bits for its own purposes, when it stores
futex addresses internally (or something to that effect; it was
a while ago I looked at this, around Feb 2009 alt. 2.6.27-ish).

brgds, H-P


Re: [Patch, i386] Limit unroll factor for certain loops on Corei7

2011-12-02 Thread Teresa Johnson
On Fri, Dec 2, 2011 at 11:36 AM, Andi Kleen  wrote:
> Teresa Johnson  writes:
>
> Interesting optimization. I would be concerned a little bit
> about compile time, does it make a measurable difference?

I haven't measured compile time explicitly, but I don't it should,
especially after I address your efficiency suggestion (see below),
since it will just have one pass over the instructions in innermost
loops.

>
>> The attached patch detects loops containing instructions that tend to
>> incur high LCP (loop changing prefix) stalls on Core i7, and limits
>> their unroll factor to try to keep the unrolled loop body small enough
>> to fit in the Corei7's loop stream detector which can hide LCP stalls
>> in loops.
>
> One more optimization would be to optimize padding for this case,
> the LSD only works if the loop is not spread over too many 32 byte
> chunks. So if you detect the loop is LSD worthy always pad to 32 bytes
> at the beginning.

Thanks for the suggestion, I will look at doing that in follow-on tuning.

>
>> To do this I leveraged the existing TARGET_LOOP_UNROLL_ADJUST target
>> hook, which was previously only defined for s390. I added one
>> additional call to this target hook, when unrolling for constant trip
>> count loops. Previously it was only called for runtime computed trip
>> counts. Andreas, can you comment on the effect for s390 of this
>> additional call of the target hook, since I can't measure that?
>
> On Sandy-Bridge there's also the decoded icache which is much larger,
> but also has some restrictions. It would be nice if this optimization
> was general enough to handle this case too.
>
> In general I notice that the tree loop unroller is too aggressive recently:
> a lot of loops that probably shouldn't be unrolled (like containing
> function calls etc.) are unrolled at -O3. So probably a better cost
> model for unrolling would make sense anyways.

These are both good suggestions, and I will look into Sandy Bridge
heuristics in follow-on work, since we will need to tune for that
soon.

>
>> +  /* Don't reduce unroll factor in loops with floating point
>> +     computation, which tend to benefit more heavily from
>> +     larger unroll factors and are less likely to bottleneck
>> +     at the decoder. */
>> +  has_FP = loop_has_FP_comp(loop);
>
> You could cache the loop body and pass it in here.

That is a great idea, and in fact, I think I will do away with this
separate function completely for this patch. I can more efficiently
look for the FP computation while I am looking for the half word
stores. I'll do that and send a follow up with the new patch.

>
> Patch looks reasonable to me, but I cannot approve.

Thanks!
Teresa

>
> -Andi
>
> --
> a...@linux.intel.com -- Speaking for myself only



-- 
Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413


Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread William J. Schmidt
OK, one more version.  This removes the basic block test and instead
implements Michael's suggestion:

On Fri, 2011-12-02 at 18:40 +0100, Michael Matz wrote:
> But I wonder why it's not enough to just do a push/pop sequence on 
> avail_exprs_stack around your new PHI processing in dom_opt_enter_block, 
> ala
> 
> +  VEC_safe_push (expr_hash_elt_t, heap, avail_exprs_stack, NULL);
>/* Create equivalences from redundant PHIs.  */
>for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>  eliminate_redundant_computations (&gsi);
> +  remove_local_expressions_from_table ();
> 
> on top of your current version.  That ought to remove the added PHI 
> expressions (and only them) from the hash table but retain the information 
> of equality in the const_or_copies_stack.  Checking the BB wouldn't be 
> required then.

Bootstrapped and regression tested on powerpc64-linux.

Thanks,
Bill


2011-12-02  Bill Schmidt  

PR middle-end/39976
* tree-ssa-dom.c (enum expr_kind): Add EXPR_PHI.
(struct hashable_expr): Add struct phi field.
(initialize_hash_element): Handle phis.
(hashable_expr_equal_p): Likewise.
(iterative_hash_hashable_expr): Likewise.
(print_expr_hash_elt): Likewise.
(dom_opt_enter_block): Create equivalences from redundant phis.
(eliminate_redundant_computations): Handle redundant phis.
(lookup_avail_expr): Handle phis.


Index: gcc/tree-ssa-dom.c
===
--- gcc/tree-ssa-dom.c  (revision 181929)
+++ gcc/tree-ssa-dom.c  (working copy)
@@ -52,7 +52,8 @@ enum expr_kind
   EXPR_UNARY,
   EXPR_BINARY,
   EXPR_TERNARY,
-  EXPR_CALL
+  EXPR_CALL,
+  EXPR_PHI
 };
 
 struct hashable_expr
@@ -65,6 +66,7 @@ struct hashable_expr
 struct { enum tree_code op;  tree opnd0, opnd1; } binary;
 struct { enum tree_code op;  tree opnd0, opnd1, opnd2; } ternary;
 struct { gimple fn_from; bool pure; size_t nargs; tree *args; } call;
+struct { size_t nargs; tree *args; } phi;
   } ops;
 };
 
@@ -281,6 +283,19 @@ initialize_hash_element (gimple stmt, tree lhs,
   expr->kind = EXPR_SINGLE;
   expr->ops.single.rhs = gimple_goto_dest (stmt);
 }
+  else if (code == GIMPLE_PHI)
+{
+  size_t nargs = gimple_phi_num_args (stmt);
+  size_t i;
+
+  expr->type = TREE_TYPE (gimple_phi_result (stmt));
+  expr->kind = EXPR_PHI;
+  expr->ops.phi.nargs = nargs;
+  expr->ops.phi.args = (tree *) xcalloc (nargs, sizeof (tree));
+
+  for (i = 0; i < nargs; i++)
+expr->ops.phi.args[i] = gimple_phi_arg_def (stmt, i);
+}
   else
 gcc_unreachable ();
 
@@ -439,6 +454,21 @@ hashable_expr_equal_p (const struct hashable_expr
 return true;
   }
 
+case EXPR_PHI:
+  {
+size_t i;
+
+if (expr0->ops.phi.nargs !=  expr1->ops.phi.nargs)
+  return false;
+
+for (i = 0; i < expr0->ops.phi.nargs; i++)
+  if (! operand_equal_p (expr0->ops.phi.args[i],
+ expr1->ops.phi.args[i], 0))
+return false;
+
+return true;
+  }
+
 default:
   gcc_unreachable ();
 }
@@ -516,6 +546,15 @@ iterative_hash_hashable_expr (const struct hashabl
   }
   break;
 
+case EXPR_PHI:
+  {
+size_t i;
+
+for (i = 0; i < expr->ops.phi.nargs; i++)
+  val = iterative_hash_expr (expr->ops.phi.args[i], val);
+  }
+  break;
+
 default:
   gcc_unreachable ();
 }
@@ -588,6 +627,22 @@ print_expr_hash_elt (FILE * stream, const struct e
   fprintf (stream, ")");
 }
 break;
+
+  case EXPR_PHI:
+{
+  size_t i;
+  size_t nargs = element->expr.ops.phi.nargs;
+
+  fprintf (stream, "PHI <");
+  for (i = 0; i < nargs; i++)
+{
+  print_generic_expr (stream, element->expr.ops.phi.args[i], 0);
+  if (i + 1 < nargs)
+fprintf (stream, ", ");
+}
+  fprintf (stream, ">");
+}
+break;
 }
   fprintf (stream, "\n");
 
@@ -1688,6 +1743,14 @@ dom_opt_enter_block (struct dom_walk_data *walk_da
   /* PHI nodes can create equivalences too.  */
   record_equivalences_from_phis (bb);
 
+  /* Create equivalences from redundant PHIs.  PHIs are only truly
+ redundant when they exist in the same block, so push another
+ marker and unwind right afterwards.  */
+  VEC_safe_push (expr_hash_elt_t, heap, avail_exprs_stack, NULL);
+  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+eliminate_redundant_computations (&gsi);
+  remove_local_expressions_from_table ();
+
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 optimize_stmt (bb, gsi);
 
@@ -1818,12 +1881,16 @@ eliminate_redundant_computations (gimple_stmt_iter
 {
   tree expr_type;
   tree cached_lhs;
+  tree def;
   bool insert = true;
   bool assigns_var_p

Re: [Patch, i386] Limit unroll factor for certain loops on Corei7

2011-12-02 Thread Xinliang David Li
On Fri, Dec 2, 2011 at 11:36 AM, Andi Kleen  wrote:
> Teresa Johnson  writes:
>
> Interesting optimization. I would be concerned a little bit
> about compile time, does it make a measurable difference?
>
>> The attached patch detects loops containing instructions that tend to
>> incur high LCP (loop changing prefix) stalls on Core i7, and limits
>> their unroll factor to try to keep the unrolled loop body small enough
>> to fit in the Corei7's loop stream detector which can hide LCP stalls
>> in loops.
>
> One more optimization would be to optimize padding for this case,
> the LSD only works if the loop is not spread over too many 32 byte
> chunks. So if you detect the loop is LSD worthy always pad to 32 bytes
> at the beginning.
>
>> To do this I leveraged the existing TARGET_LOOP_UNROLL_ADJUST target
>> hook, which was previously only defined for s390. I added one
>> additional call to this target hook, when unrolling for constant trip
>> count loops. Previously it was only called for runtime computed trip
>> counts. Andreas, can you comment on the effect for s390 of this
>> additional call of the target hook, since I can't measure that?
>
> On Sandy-Bridge there's also the decoded icache which is much larger,
> but also has some restrictions. It would be nice if this optimization
> was general enough to handle this case too.
>
> In general I notice that the tree loop unroller is too aggressive recently:
> a lot of loops that probably shouldn't be unrolled (like containing
> function calls etc.) are unrolled at -O3. So probably a better cost
> model for unrolling would make sense anyways.
>

Yes, I believe there are many target specific unroll tunings that can
be done -- the current unroll target independent cost/benefit analysis
is too weak.

David



>> +  /* Don't reduce unroll factor in loops with floating point
>> +     computation, which tend to benefit more heavily from
>> +     larger unroll factors and are less likely to bottleneck
>> +     at the decoder. */
>> +  has_FP = loop_has_FP_comp(loop);
>
> You could cache the loop body and pass it in here.
>
> Patch looks reasonable to me, but I cannot approve.
>
> -Andi
>
> --
> a...@linux.intel.com -- Speaking for myself only


Re: [Patch, i386] Limit unroll factor for certain loops on Corei7

2011-12-02 Thread Xinliang David Li
;
>
> +/* Determine whether LOOP contains floating-point computation. */
> +bool
> +loop_has_FP_comp(struct loop *loop)
> +{
> +  rtx set, dest;

This probably should be extended to detect other long latency
operations in the future.


> +
> +  if (ix86_tune != PROCESSOR_COREI7_64 &&
> +      ix86_tune != PROCESSOR_COREI7_32)
> +    return nunroll;

Is it better to generalize it and model the LSD and LSD size in the
target model description? -- probably a different patch for that.


> +
> +  /* Look for instructions that store a constant into HImode (16-bit)
> +     memory. These require a length-changing prefix and on corei7 are
> +     prone to LCP stalls. These stalls can be avoided if the loop
> +     is streamed from the loop stream detector. */
> +  body = get_loop_body (loop);
> +  for (i = 0; i < loop->num_nodes && !found; i++)
> +    {
> +      bb = body[i];
> +
> +      FOR_BB_INSNS (bb, insn)
> +        {
> +          rtx set_expr;
> +          set_expr = single_set (insn);
> +          if (set_expr != NULL_RTX
> +              && GET_MODE (SET_DEST (set_expr)) == HImode
> +              && CONST_INT_P (SET_SRC (set_expr))
> +              && MEM_P (SET_DEST (set_expr)))
> +            {
> +              found = true;
> +              break;
> +            }
> +        }
> +    }
> +  free (body);


Probably generalize this to handle other long latency FE stalls -- for
now it only handles LCP stalls.

> +
> +  if (!found)
> +    return nunroll;
> +
> +  /* Don't reduce unroll factor in loops with floating point
> +     computation, which tend to benefit more heavily from
> +     larger unroll factors and are less likely to bottleneck
> +     at the decoder. */
> +  has_FP = loop_has_FP_comp(loop);
> +  if (has_FP)
> +    return nunroll;
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file,
> +               ";; Loop contains HImode store of const (possible LCP
> stalls),\n");
> +      fprintf (dump_file,
> +               "   reduce unroll factor to fit into Loop Stream Detector\n");
> +    }
> +
> +  /* On corei7 the loop stream detector can hold about 28 instructions, so
> +     don't allow unrolling to exceed that. */
> +  newunroll = 28 / loop->av_ninsns;

Is 28 number of instructions or number of uOps?

thanks,

David

> +  if (newunroll < nunroll)
> +    return newunroll;
> +
> +  return nunroll;
> +}
> +
>  /* Initialize the GCC target structure.  */
>  #undef TARGET_RETURN_IN_MEMORY
>  #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
> @@ -38685,6 +38755,9 @@ ix86_autovectorize_vector_sizes (void)
>  #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
>  #endif
>
> +#undef TARGET_LOOP_UNROLL_ADJUST
> +#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
> +
>  struct gcc_target targetm = TARGET_INITIALIZER;
>  ^L
>  #include "gt-i386.h"
>
> --
> Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413


RTEMS Go Patch

2011-12-02 Thread Joel Sherrill

Hi,

This addresses all of the Go compilation issues on the
head except one.

Ian.. Is this OK to commit? Or do you have suggestions
on how to make it more general?

Thanks.

2011-12-02  Joel Sherrill 

* runtime/go-signal.c: Add conditional on SIGPROF.
* runtime/mem_posix_memalign.c: Add USED directives.
* libgo/go/syscall/wait.c:  Conditionalize on WIFxxx macros
and SIGxxx being defined.

--
Joel Sherrill, Ph.D. Director of Research&  Development
joel.sherr...@oarcorp.comOn-Line Applications Research
Ask me about RTEMS: a free RTOS  Huntsville AL 35805
   Support Available (256) 722-9985


Index: libgo/runtime/go-signal.c
===
--- libgo/runtime/go-signal.c   (revision 181924)
+++ libgo/runtime/go-signal.c   (working copy)
@@ -122,12 +122,14 @@
   const char *msg;
   int i;
 
+#ifdef SIGPROF
   if (sig == SIGPROF)
 {
   /* FIXME.  */
   runtime_sigprof (0, 0, nil, nil);
   return;
 }
+#endif
 
   /* FIXME: Should check siginfo for more information when
  available.  */
@@ -257,6 +259,7 @@
 void
 runtime_resetcpuprofiler(int32 hz)
 {
+#ifdef SIGPROF
   struct itimerval it;
   struct sigaction sa;
   int i;
@@ -289,6 +292,7 @@
   i = setitimer (ITIMER_PROF, &it, NULL);
   __go_assert (i == 0);
 }
+#endif
 
   runtime_m()->profilehz = hz;
 }
Index: libgo/runtime/mem_posix_memalign.c
===
--- libgo/runtime/mem_posix_memalign.c  (revision 181924)
+++ libgo/runtime/mem_posix_memalign.c  (working copy)
@@ -36,10 +36,13 @@
 void*
 runtime_SysReserve(void *v, uintptr n)
 {
+   USED(v);
return runtime_SysAlloc(n);
 }
 
 void
 runtime_SysMap(void *v, uintptr n)
 {
+   USED(v);
+   USED(n);
 }
Index: libgo/go/syscall/wait.c
===
--- libgo/go/syscall/wait.c (revision 181924)
+++ libgo/go/syscall/wait.c (working copy)
@@ -12,6 +12,7 @@
 
 #include 
 #include 
+#include "runtime.h"
 
 extern _Bool Exited (uint32_t *w)
   __asm__ 
("libgo_syscall.syscall.Exited.N32_libgo_syscall.syscall.WaitStatus");
@@ -37,7 +38,12 @@
 _Bool
 Stopped (uint32_t *w)
 {
+#ifndef WIFSTOPPED
+  USED(w);
+  return 0;
+#else
   return WIFSTOPPED (*w) != 0;
+#endif
 }
 
 extern _Bool Continued (uint32_t *w)
@@ -46,7 +52,12 @@
 _Bool
 Continued (uint32_t *w)
 {
+#ifndef WIFCONTINUED
+  USED(w);
+  return 0;
+#else
   return WIFCONTINUED (*w) != 0;
+#endif
 }
 
 extern _Bool CoreDump (uint32_t *w)
@@ -55,7 +66,12 @@
 _Bool
 CoreDump (uint32_t *w)
 {
+#ifndef WCOREDUMP
+  USED(w);
+  return 0;
+#else
   return WCOREDUMP (*w) != 0;
+#endif
 }
 
 extern int ExitStatus (uint32_t *w)
@@ -95,9 +111,10 @@
   __asm__ 
("libgo_syscall.syscall.TrapCause.N32_libgo_syscall.syscall.WaitStatus");
 
 int
-TrapCause (uint32_t *w __attribute__ ((unused)))
+TrapCause (uint32_t *w)
 {
-#ifndef __linux__
+#if !(defined(WIFSTOPPED) && defined(WSTOPSIG) && defined(SIGTRAP))
+  USED(w);
   return -1;
 #else
   if (!WIFSTOPPED (*w) || WSTOPSIG (*w) != SIGTRAP)


Re: [google] Patch to enable efficient function level instrumentation (issue 5416043)

2011-12-02 Thread davidxl


http://codereview.appspot.com/5416043/diff/6001/gcc/config/i386/i386.c
File gcc/config/i386/i386.c (right):

http://codereview.appspot.com/5416043/diff/6001/gcc/config/i386/i386.c#newcode10881
gcc/config/i386/i386.c:10881: +   '_function_patch_epilogue'. The
backpointer section can be used to navigate
Is it strictly necessary? If most of the functions are instrumented, can
the runtime instrumenter just recognize functions need patching via
pattern matching -- as least for function prologue?

http://codereview.appspot.com/5416043/


Re: [Patch, i386] Limit unroll factor for certain loops on Corei7

2011-12-02 Thread Andi Kleen
Teresa Johnson  writes:

Interesting optimization. I would be concerned a little bit
about compile time, does it make a measurable difference?

> The attached patch detects loops containing instructions that tend to
> incur high LCP (loop changing prefix) stalls on Core i7, and limits
> their unroll factor to try to keep the unrolled loop body small enough
> to fit in the Corei7's loop stream detector which can hide LCP stalls
> in loops.

One more optimization would be to optimize padding for this case,
the LSD only works if the loop is not spread over too many 32 byte
chunks. So if you detect the loop is LSD worthy always pad to 32 bytes
at the beginning.

> To do this I leveraged the existing TARGET_LOOP_UNROLL_ADJUST target
> hook, which was previously only defined for s390. I added one
> additional call to this target hook, when unrolling for constant trip
> count loops. Previously it was only called for runtime computed trip
> counts. Andreas, can you comment on the effect for s390 of this
> additional call of the target hook, since I can't measure that?

On Sandy-Bridge there's also the decoded icache which is much larger,
but also has some restrictions. It would be nice if this optimization
was general enough to handle this case too.

In general I notice that the tree loop unroller is too aggressive recently:
a lot of loops that probably shouldn't be unrolled (like containing
function calls etc.) are unrolled at -O3. So probably a better cost
model for unrolling would make sense anyways.

> +  /* Don't reduce unroll factor in loops with floating point
> + computation, which tend to benefit more heavily from
> + larger unroll factors and are less likely to bottleneck
> + at the decoder. */
> +  has_FP = loop_has_FP_comp(loop);

You could cache the loop body and pass it in here.

Patch looks reasonable to me, but I cannot approve.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only


libgo patch committed: Update to weekly.2011-11-01

2011-12-02 Thread Ian Lance Taylor
This patch updates the Go library to the weekly.2011-11-01 release (I
know that was a month ago).  This patch includes a change to the
reflection string for the empty interface type to match a similar change
to the other Go compiler; the change to gccgo was by Rémy Oudompheng.

The patch is too large to include here, so I have just included the
changes to the files which are not simply copies of the master Go
library.  The other changes may be seen in the revision repository or in
the master Go library.

Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu.
Committed to mainline.

Ian

diff -r f08c4c3f8366 go/types.cc
--- a/go/types.cc	Thu Dec 01 15:50:29 2011 -0800
+++ b/go/types.cc	Fri Dec 02 11:15:59 2011 -0800
@@ -5876,13 +5876,13 @@
   ret->append("interface {");
   if (this->methods_ != NULL)
 {
+  ret->push_back(' ');
   for (Typed_identifier_list::const_iterator p = this->methods_->begin();
 	   p != this->methods_->end();
 	   ++p)
 	{
 	  if (p != this->methods_->begin())
-	ret->append(";");
-	  ret->push_back(' ');
+	ret->append("; ");
 	  if (!Gogo::is_hidden_name(p->name()))
 	ret->append(p->name());
 	  else
@@ -5898,8 +5898,9 @@
 	  sub = sub.substr(4);
 	  ret->append(sub);
 	}
-}
-  ret->append(" }");
+  ret->push_back(' ');
+}
+  ret->append("}");
 }
 
 // Mangled name.
diff -r f08c4c3f8366 libgo/configure.ac
--- a/libgo/configure.ac	Thu Dec 01 15:50:29 2011 -0800
+++ b/libgo/configure.ac	Fri Dec 02 11:15:59 2011 -0800
@@ -452,7 +452,7 @@
 
 AM_CONDITIONAL(HAVE_SYS_MMAN_H, test "$ac_cv_header_sys_mman_h" = yes)
 
-AC_CHECK_FUNCS(srandom random strerror_r strsignal wait4 mincore setenv)
+AC_CHECK_FUNCS(strerror_r strsignal wait4 mincore setenv)
 AM_CONDITIONAL(HAVE_STRERROR_R, test "$ac_cv_func_strerror_r" = yes)
 AM_CONDITIONAL(HAVE_WAIT4, test "$ac_cv_func_wait4" = yes)
 
diff -r f08c4c3f8366 libgo/runtime/go-main.c
--- a/libgo/runtime/go-main.c	Thu Dec 01 15:50:29 2011 -0800
+++ b/libgo/runtime/go-main.c	Fri Dec 02 11:15:59 2011 -0800
@@ -32,10 +32,7 @@
 
 extern char **environ;
 
-/* These functions are created for the main package.  */
-extern void __go_init_main (void);
-extern void real_main (void) asm ("main.main");
-
+extern void runtime_main (void);
 static void mainstart (void *);
 
 /* The main function.  */
@@ -47,13 +44,6 @@
   runtime_args (argc, (byte **) argv);
   runtime_osinit ();
   runtime_schedinit ();
-
-#if defined(HAVE_SRANDOM)
-  srandom ((unsigned int) time (NULL));
-#else
-  srand ((unsigned int) time (NULL));
-#endif
-
   __go_go (mainstart, NULL);
   runtime_mstart (runtime_m ());
   abort ();
@@ -62,13 +52,5 @@
 static void
 mainstart (void *arg __attribute__ ((unused)))
 {
-  __go_init_main ();
-
-  mstats.enablegc = 1;
-
-  real_main ();
-
-  runtime_exit (0);
-
-  abort ();
+  runtime_main ();
 }
diff -r f08c4c3f8366 libgo/runtime/proc.c
--- a/libgo/runtime/proc.c	Thu Dec 01 15:50:29 2011 -0800
+++ b/libgo/runtime/proc.c	Fri Dec 02 11:15:59 2011 -0800
@@ -128,6 +128,9 @@
 	volatile uint32 atomic;	// atomic scheduling word (see below)
 
 	int32 profilehz;	// cpu profiling rate
+	
+	bool init;  // running initialization
+	bool lockmain;  // init called runtime.LockOSThread
 
 	Note	stopped;	// one g can set waitstop and wait here for m's to stop
 };
@@ -292,11 +295,7 @@
 //	make & queue new G
 //	call runtime_mstart
 //
-// The new G does:
-//
-//	call main_init_function
-//	call initdone
-//	call main_main
+// The new G calls runtime_main.
 void
 runtime_schedinit(void)
 {
@@ -340,6 +339,37 @@
 	m->nomemprof--;
 }
 
+extern void main_init(void) __asm__ ("__go_init_main");
+extern void main_main(void) __asm__ ("main.main");
+
+// The main goroutine.
+void
+runtime_main(void)
+{
+	// Lock the main goroutine onto this, the main OS thread,
+	// during initialization.  Most programs won't care, but a few
+	// do require certain calls to be made by the main thread.
+	// Those can arrange for main.main to run in the main thread
+	// by calling runtime.LockOSThread during initialization
+	// to preserve the lock.
+	runtime_LockOSThread();
+	runtime_sched.init = true;
+	main_init();
+	runtime_sched.init = false;
+	if(!runtime_sched.lockmain)
+		runtime_UnlockOSThread();
+
+	// For gccgo we have to wait until after main is initialized
+	// to enable GC, because initializing main registers the GC
+	// roots.
+	mstats.enablegc = 1;
+
+	main_main();
+	runtime_exit(0);
+	for(;;)
+		*(int32*)0 = 0;
+}
+
 // Lock the scheduler.
 static void
 schedlock(void)
@@ -1233,16 +1263,6 @@
 	runtime_gosched();
 }
 
-void runtime_LockOSThread (void)
-  __asm__ ("libgo_runtime.runtime.LockOSThread");
-
-void
-runtime_LockOSThread(void)
-{
-	m->lockedg = g;
-	g->lockedm = m;
-}
-
 // delete when scheduler is stronger
 int32
 runtime_gomaxprocsfunc(int32 n)
@@ -1282,12 +1302,24 @@
 	return ret;
 }
 
-void runtime_UnlockOSThread (void)
-  __asm__ ("libgo_runtime.runtime.UnlockOSThread");
+void
+runtime_LockOSThread(void)
+{
+	if(m == &runtime_

[PATCH] Improve debug info if tree DCE removes stores (PR debug/50317, fallout)

2011-12-02 Thread Jakub Jelinek
On Fri, Dec 02, 2011 at 02:27:40PM +0100, Richard Guenther wrote:
> This change seems wrong.  We are turning valid gimple
> 
> # DEBUG D#2 => transfer.0  [with addres taken]
> 
> into invalid one
> 
> # DEBUG D#2 => transfer.0  [without address taken]
> 
> once you update that stmt with update_stmt you'll get an SSA operand
> for transfer.0 which is not in SSA form because you fail to rewrite it
> into.
> 
> Why do this in remove_unused_locals and not in update_address_taken?
> Or, why do it at all?
> 
> I have a SSA operand checking patch that catches this now ...

Here is a fix for that.  Instead of clearing TREE_ADDRESSABLE for
unreferenced vars we allow them in target_for_debug_bind if they aren't
referenced vars (thus we don't risk mixing VALUE tracking with the
old style REG_EXPR/MEM_EXPR tracking of these variables).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-12-02  Jakub Jelinek  

PR debug/50317
* tree-ssa.c (target_for_debug_bind): Also allow is_gimple_reg_type
vars that aren't referenced.
(tree-ssa-live.c (remove_unused_locals): Don't clear TREE_ADDRESSABLE
of unreferenced local vars.
* cfgexpand.c (expand_debug_expr): For DEBUG_IMPLICIT_PTR allow also
TREE_ADDRESSABLE vars that satisfy target_for_debug_bind.

--- gcc/tree-ssa.c.jj   2011-11-29 08:58:52.0 +0100
+++ gcc/tree-ssa.c  2011-12-02 15:04:03.494148642 +0100
@@ -264,7 +264,12 @@ target_for_debug_bind (tree var)
 return NULL_TREE;
 
   if (!is_gimple_reg (var))
-return NULL_TREE;
+{
+  if (is_gimple_reg_type (TREE_TYPE (var))
+ && referenced_var_lookup (cfun, DECL_UID (var)) == NULL_TREE)
+   return var;
+  return NULL_TREE;
+}
 
   return var;
 }
--- gcc/tree-ssa-live.c.jj  2011-12-02 01:52:27.0 +0100
+++ gcc/tree-ssa-live.c 2011-12-02 15:04:59.601816335 +0100
@@ -814,15 +814,7 @@ remove_unused_locals (void)
  bitmap_set_bit (global_unused_vars, DECL_UID (var));
}
  else
-   {
- /* For unreferenced local vars drop TREE_ADDRESSABLE
-bit in case it is referenced from debug stmts.  */
- if (DECL_CONTEXT (var) == current_function_decl
- && TREE_ADDRESSABLE (var)
- && is_gimple_reg_type (TREE_TYPE (var)))
-   TREE_ADDRESSABLE (var) = 0;
- continue;
-   }
+   continue;
}
   else if (TREE_CODE (var) == VAR_DECL
   && DECL_HARD_REGISTER (var)
--- gcc/cfgexpand.c.jj  2011-12-02 01:52:27.0 +0100
+++ gcc/cfgexpand.c 2011-12-02 15:24:37.982327507 +0100
@@ -3325,7 +3325,8 @@ expand_debug_expr (tree exp)
  if ((TREE_CODE (TREE_OPERAND (exp, 0)) == VAR_DECL
   || TREE_CODE (TREE_OPERAND (exp, 0)) == PARM_DECL
   || TREE_CODE (TREE_OPERAND (exp, 0)) == RESULT_DECL)
- && !TREE_ADDRESSABLE (TREE_OPERAND (exp, 0)))
+ && (!TREE_ADDRESSABLE (TREE_OPERAND (exp, 0))
+ || target_for_debug_bind (TREE_OPERAND (exp, 0
return gen_rtx_DEBUG_IMPLICIT_PTR (mode, TREE_OPERAND (exp, 0));
 
  if (handled_component_p (TREE_OPERAND (exp, 0)))
@@ -3337,7 +3338,8 @@ expand_debug_expr (tree exp)
  if ((TREE_CODE (decl) == VAR_DECL
   || TREE_CODE (decl) == PARM_DECL
   || TREE_CODE (decl) == RESULT_DECL)
- && !TREE_ADDRESSABLE (decl)
+ && (!TREE_ADDRESSABLE (decl)
+ || target_for_debug_bind (decl))
  && (bitoffset % BITS_PER_UNIT) == 0
  && bitsize > 0
  && bitsize == maxsize)


Jakub


Go patch committed: Clean up merge.sh

2011-12-02 Thread Ian Lance Taylor
This patch to the libgo script merge.sh does the following:

* Permit specifying a revision on the command line.

* Add various new C files to the merge, reflecting their addition to the
  library.

* Automatically converts the Unicode character · used in the master
  library C code to _ before merging, avoiding a lot of pointless
  conflicts.

Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu.
Committed to mainline.

Ian

diff -r f08c4c3f8366 libgo/merge.sh
--- a/libgo/merge.sh	Thu Dec 01 15:50:29 2011 -0800
+++ b/libgo/merge.sh	Fri Dec 02 11:24:12 2011 -0800
@@ -25,10 +25,15 @@
   exit 1
 fi
 
-if test $# -ne 1; then
-  echo 1>&2 "merge.sh: Usage: merge.sh mercurial-repository"
+rev=weekly
+case $# in
+1) ;;
+2) rev=$2 ;;
+*)
+  echo 1>&2 "merge.sh: Usage: merge.sh mercurial-repository [revision]"
   exit 1
-fi
+  ;;
+esac
 
 repository=$1
 
@@ -38,9 +43,9 @@
 hg clone -r ${old_rev} ${repository} ${OLDDIR}
 
 rm -rf ${NEWDIR}
-hg clone -u weekly ${repository} ${NEWDIR}
+hg clone -u ${rev} ${repository} ${NEWDIR}
 
-new_rev=`cd ${NEWDIR} && hg log -r weekly | sed 1q | sed -e 's/.*://'`
+new_rev=`cd ${NEWDIR} && hg log -r ${rev} | sed 1q | sed -e 's/.*://'`
 
 merge() {
   name=$1
@@ -146,12 +151,33 @@
   done
 done
 
-runtime="goc2c.c mcache.c mcentral.c mfinal.c mfixalloc.c mgc0.c mheap.c msize.c malloc.h malloc.goc mprof.goc"
+runtime="chan.c cpuprof.c goc2c.c lock_futex.c lock_sema.c mcache.c mcentral.c mfinal.c mfixalloc.c mgc0.c mheap.c msize.c proc.c runtime.c runtime.h malloc.h malloc.goc mprof.goc runtime1.goc sema.goc sigqueue.goc string.goc"
 for f in $runtime; do
   oldfile=${OLDDIR}/src/pkg/runtime/$f
-  newfile=${NEWDIR}/src/pkg/runtime/$f
-  libgofile=runtime/$f
-  merge $f ${oldfile} ${newfile} ${libgofile}
+  if test -f ${oldfile}; then
+sed -e 's/·/_/g' < ${oldfile} > ${oldfile}.tmp
+oldfile=${oldfile}.tmp
+newfile=${NEWDIR}/src/pkg/runtime/$f
+sed -e 's/·/_/g' < ${newfile} > ${newfile}.tmp
+newfile=${newfile}.tmp
+libgofile=runtime/$f
+merge $f ${oldfile} ${newfile} ${libgofile}
+  fi
+done
+
+runtime2="linux/thread.c thread-linux.c linux/mem.c mem.c"
+echo $runtime2 | while read from; do
+  read to
+  oldfile=${OLDDIR}/src/pkg/runtime/$from
+  if test -f ${oldfile}; then
+sed -e 's/·/_/g' < ${oldfile} > ${oldfile}.tmp
+oldfile=${oldfile}.tmp
+newfile=${NEWDIR}/src/pkg/runtime/$from
+sed -e 's/·/_/g' < ${newfile} > ${newfile}.tmp
+newfile=${newfile}.tmp
+libgofile=runtime/$to
+merge $f ${oldfile} ${newfile} ${libgofile}
+  fi
 done
 
 (cd ${OLDDIR}/src/pkg && find . -name '*.go' -print) | while read f; do


Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread William J. Schmidt
On Fri, 2011-12-02 at 13:08 -0600, William J. Schmidt wrote:
> 
> On Fri, 2011-12-02 at 11:28 -0700, Jeff Law wrote:
> > On 12/02/11 11:10, William J. Schmidt wrote:
> > > 
> > >> 
> > >> I see a mention of creating equivalences for redundant PHIs?  Are
> > >> we just trying to determine that two PHIs are going to result in
> > >> the same value?
> > > 
> > > Jeff, see comment #37 in 
> > > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39976.  The issue is
> > > that we have two PHIs in the same block as follows:
> > > 
> > > # prephitmp.35_322 = PHI  
> > > # prephitmp.35_225 = PHI 
> > OK.  Make sure you has the phi argument & its edge...  Simlarly the
> > equality comparison needs to check args & edges.  Or s/edge/edge->src/
> > if that's easier.
> > 
> 
> OK, this is what I was missing.  Including edge->src->index in the hash
> and equality test to get the actual origin of the arguments should allow
> the PHIs to match when they're truly equivalent.  Thanks!

Erm, wait.  How are PHIs in different blocks going to have the same
incoming edges?  (I was thinking of control dependence edges, but these
are just regular control flow incoming edges, right?)  So this really
isn't going to help.

> 
> > > 
> > > The coalescing algorithm in tree-outof-ssa doesn't handle this well
> > > and ends up splitting the back arc of a simple loop as a result.
> > > I'm creating an equivalence between prephitmp.35_322 and
> > > prephitmp.35_225 in this case so that one PHI goes dead and is
> > > removed.
> > Right.  I don't think I bothered with this because I didn't see it
> > happening and the comparison of the PHIs with each other gets
> > potentially expensive.
> >
> >  As pointed out in
> > > the comments, this needs to be restricted to occur only in the
> > > same block since the PHIs aren't necessarily equivalent otherwise.
> > I must be missing something -- why is the equivalence only valid in
> > the same block?
> > 
> > Conceptually this situation isn't any different replacing the second
> > PHI with a copy
> > blah_225 = blah_322
> > 
> > And the equivalence is valid for the entire dominator subtree.

If you go back a couple of posts in this thread, I have an example of
control flow where two control-equivalent blocks contain PHIs with
identical constant arguments.  But the constants are "defined" in
different blocks with different control dependence, so while the
constants are equivalent, the PHIs aren't.  I originally replaced this
with a copy as suggested, which broke things.

> 
> Yes, once the origin of the PHI arguments is correctly taken into
> account, then equivalent PHIs can be eliminated within the subtree.

...but none will be found...

> 
> > 
> > If you really need to create a temporary equiv, pushing the marker,
> > create the equiv and pop the marker when equiv dies is simple and easy.
> >

Yes, this still seems like the most promising thing to do.

> > Jeff
> 
> Much obliged,
> Bill



[PATCH] Fold constant argument VEC_{PACK_{,FIX_}TRUNC,{UNPACK{,_FLOAT},WIDEN_MULT}_{LO,HI}}_EXPR

2011-12-02 Thread Jakub Jelinek
Hi!

As I found during investigation of PR51387, e.g. on the attached testcase
we generate pretty bad code (for f1 even with bigger N like 256 for avx2),
because after vectorization cunroll unrolls the loops completely and we
end up with lots of VEC_PACK_TRUNC_EXPR etc. expressions with VECTOR_CST
arguments.  We don't fold them, thus we read lots of constants from memory
and reshuffle them in lots of code.  This patch adds folding for these
expressions, we end up on this testcase with the same amount of loaded
constants from memory, but no need to reshuffle it.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-12-02  Jakub Jelinek  

* fold-const.c (fold_unary_loc): Fold VEC_UNPACK_LO_EXPR,
VEC_UNPACK_HI_EXPR, VEC_UNPACK_FLOAT_LO_EXPR and
VEC_UNPACK_FLOAT_HI_EXPR with VECTOR_CST argument.
(fold_binary_loc): Fold VEC_PACK_TRUNC_EXPR,
VEC_PACK_FIX_TRUNC_EXPR, VEC_WIDEN_MULT_LO_EXPR
and VEC_WIDEN_MULT_HI_EXPR with VECTOR_CST arguments.

* gcc.dg/vect/vect-122.c: New test.

--- gcc/fold-const.c.jj 2011-12-02 01:52:26.0 +0100
+++ gcc/fold-const.c2011-12-02 17:43:09.246557524 +0100
@@ -7651,6 +7651,8 @@ build_fold_addr_expr_loc (location_t loc
   return build_fold_addr_expr_with_type_loc (loc, t, ptrtype);
 }
 
+static bool vec_cst_ctor_to_array (tree, tree *);
+
 /* Fold a unary expression of code CODE and type TYPE with operand
OP0.  Return the folded expression if folding is successful.
Otherwise, return NULL_TREE.  */
@@ -8294,6 +8296,44 @@ fold_unary_loc (location_t loc, enum tre
}
   return NULL_TREE;
 
+case VEC_UNPACK_LO_EXPR:
+case VEC_UNPACK_HI_EXPR:
+case VEC_UNPACK_FLOAT_LO_EXPR:
+case VEC_UNPACK_FLOAT_HI_EXPR:
+  {
+   unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+   tree *elts, vals = NULL_TREE;
+   enum tree_code subcode;
+
+   gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts * 2);
+   if (TREE_CODE (arg0) != VECTOR_CST)
+ return NULL_TREE;
+
+   elts = XALLOCAVEC (tree, nelts * 2);
+   if (!vec_cst_ctor_to_array (arg0, elts))
+ return NULL_TREE;
+
+   if ((!BYTES_BIG_ENDIAN) ^ (code == VEC_UNPACK_LO_EXPR
+  || code == VEC_UNPACK_FLOAT_LO_EXPR))
+ elts += nelts;
+
+   if (code == VEC_UNPACK_LO_EXPR || code == VEC_UNPACK_HI_EXPR)
+ subcode = NOP_EXPR;
+   else
+ subcode = FLOAT_EXPR;
+
+   for (i = 0; i < nelts; i++)
+ {
+   elts[i] = fold_convert_const (subcode, TREE_TYPE (type), elts[i]);
+   if (elts[i] == NULL_TREE || !CONSTANT_CLASS_P (elts[i]))
+ return NULL_TREE;
+ }
+
+   for (i = 0; i < nelts; i++)
+ vals = tree_cons (NULL_TREE, elts[nelts - i - 1], vals);
+   return build_vector (type, vals);
+  }
+
 default:
   return NULL_TREE;
 } /* switch (code) */
@@ -13498,6 +13538,73 @@ fold_binary_loc (location_t loc,
}
   return NULL_TREE;
 
+case VEC_PACK_TRUNC_EXPR:
+case VEC_PACK_FIX_TRUNC_EXPR:
+  {
+   unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+   tree *elts, vals = NULL_TREE;
+
+   gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts / 2
+   && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) == nelts / 2);
+   if (TREE_CODE (arg0) != VECTOR_CST || TREE_CODE (arg1) != VECTOR_CST)
+ return NULL_TREE;
+
+   elts = XALLOCAVEC (tree, nelts);
+   if (!vec_cst_ctor_to_array (arg0, elts)
+   || !vec_cst_ctor_to_array (arg1, elts + nelts / 2))
+ return NULL_TREE;
+
+   for (i = 0; i < nelts; i++)
+ {
+   elts[i] = fold_convert_const (code == VEC_PACK_TRUNC_EXPR
+ ? NOP_EXPR : FIX_TRUNC_EXPR,
+ TREE_TYPE (type), elts[i]);
+   if (elts[i] == NULL_TREE || !CONSTANT_CLASS_P (elts[i]))
+ return NULL_TREE;
+ }
+
+   for (i = 0; i < nelts; i++)
+ vals = tree_cons (NULL_TREE, elts[nelts - i - 1], vals);
+   return build_vector (type, vals);
+  }
+
+case VEC_WIDEN_MULT_LO_EXPR:
+case VEC_WIDEN_MULT_HI_EXPR:
+  {
+   unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+   tree *elts, vals = NULL_TREE;
+
+   gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts * 2
+   && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) == nelts * 2);
+   if (TREE_CODE (arg0) != VECTOR_CST || TREE_CODE (arg1) != VECTOR_CST)
+ return NULL_TREE;
+
+   elts = XALLOCAVEC (tree, nelts * 4);
+   if (!vec_cst_ctor_to_array (arg0, elts)
+   || !vec_cst_ctor_to_array (arg1, elts + nelts * 2))
+ return NULL_TREE;
+
+   if ((!BYTES_BIG_ENDIAN) ^ (code == VEC_WIDEN_MULT_LO_EXPR))
+ elts += nelts;
+
+   for (i = 0; i < nelts; i++)
+ {
+   elts[i] = fold

Re: [Patch.AVR,4.6] Fix PR51002

2011-12-02 Thread Georg-Johann Lay
Denis Chertykov wrote:
> 2011/11/29 Georg-Johann Lay:
>> For devices with 8-bit SP reading the high byte SP_H of SP will get garbage.
>>
>> The patch uses CLR instead of IN SP_H to "read" the high part of SP.
>>
>> There are two issues with this patch:
>>
>> == 1 ==
>>
>> I cannot really test it because for devices that small running test suite
>> does not give usable results.  So I just looked at the patch and the
>> small test case like the following compiled
>>
>> $ avr-gcc-4.6.2 -Os -mmcu=attiny26 -m[no]call-prologues
>>
>> long long a, b;
>>
>> long long __attribute__((noinline,noclione))
>> bar (char volatile *c)
>> {
>>*c = 1;
>>return a+b;
>> }
>>
>> long long foo()
>> {
>>char buf[16];
>>return bar (buf);
>> }
>>
>>
>> int main (void)
>> {
>>return foo();
>> }
>>
>>
>> The C parts look fine but...
>>
>>
>> == 2 ==
>>
>> The libgcc parts will still read garbage to R29 as explained in the
>> FIXMEs there.
>>
>> Solving the FIXMEs can only be achieved by splitting multilibs avr2 and 
>> avr25,
>> i.e. the mutlilibs that mix devices with/without SP.H, into avr2, avr21, 
>> avr24,
>> avr25, say.
>>
>> I don't think it's a good idea to have real 8-bit SP/FP and that it would 
>> cause
>> all sorts of trouble.
> 
> I'm agree.
> 
>> Ok to commit to 4.6?
> 
> Approved.

http://gcc.gnu.org/viewcvs?view=revision&revision=181936

Installed as gcc-4_6-branch r181936 with the following change:


Index: config/avr/avr-devices.c
===
--- config/avr/avr-devices.c(revision 181783)
+++ config/avr/avr-devices.c(working copy)
@@ -70,7 +70,7 @@ const struct mcu_type_s avr_mcu_types[]
   { "attiny2313a",  ARCH_AVR25, "__AVR_ATtiny2313A__",  1, 0x0060,
"tn2313a" },
   { "attiny24", ARCH_AVR25, "__AVR_ATtiny24__", 1, 0x0060,
"tn24" },
   { "attiny24a",ARCH_AVR25, "__AVR_ATtiny24A__",1, 0x0060,
"tn24a" },
-  { "attiny4313",   ARCH_AVR25, "__AVR_ATtiny4313__",   1, 0x0060,
"tn4313" },
+  { "attiny4313",   ARCH_AVR25, "__AVR_ATtiny4313__",   0, 0x0060,
"tn4313" },
   { "attiny44", ARCH_AVR25, "__AVR_ATtiny44__", 0, 0x0060,
"tn44" },
   { "attiny44a",ARCH_AVR25, "__AVR_ATtiny44A__",0, 0x0060,
"tn44a" },
   { "attiny84", ARCH_AVR25, "__AVR_ATtiny84__", 0, 0x0060,
"tn84" },
@@ -88,7 +88,7 @@ const struct mcu_type_s avr_mcu_types[]
   { "attiny87", ARCH_AVR25, "__AVR_ATtiny87__", 0, 0x0100,
"tn87" },
   { "attiny48", ARCH_AVR25, "__AVR_ATtiny48__", 0, 0x0100,
"tn48" },
   { "attiny88", ARCH_AVR25, "__AVR_ATtiny88__", 0, 0x0100,
"tn88" },
-  { "at86rf401",ARCH_AVR25, "__AVR_AT86RF401__",1, 0x0060,
"86401" },
+  { "at86rf401",ARCH_AVR25, "__AVR_AT86RF401__",0, 0x0060,
"86401" },
 /* Classic, > 8K, <= 64K.  */
   { "avr3", ARCH_AVR3, NULL,0, 0x0060,
"43355" },
   { "at43usb355",   ARCH_AVR3, "__AVR_AT43USB355__",0, 0x0060,
"43355" },

As it turned out, ATtiny4313 and AT86RF401 have a 16-bit stack pointer and
their manual is bogus in stating their SP has 8 bits only.

This is not a complete fix to the SPH issue because PR51345 is still open:
libgcc might happily read gabage into R29 from IO[0x3e].

Johann


>> What about splitting multilibs?
> 
> Seems that splitting multilibs is a right way.

Opened PR51345 for it.

>> Is this appropriate for 4.7?
> 
> As I understand, any changes appropriate for our port in any stage.
> 
>> Johann
>>
>>PR target/51002
>>* config/avr/libgcc.S (__prologue_saves__, __epilogue_restores__):
>>Enclose parts using __SP_H__ in defined (__AVR_HAVE_8BIT_SP__).
>>Add FIXME comments.
>>* config/avr/avr.md (movhi_sp_r_irq_off, movhi_sp_r_irq_on): Set
>>insn condition to !AVR_HAVE_8BIT_SP.
>>* config/avr/avr.c (output_movhi): "clr%B0" instead of "in
>>%B0,__SP_H__" if AVR_HAVE_8BIT_SP.
>>(avr_file_start): Only print "__SP_H__ = 0x3e" if !AVR_HAVE_8BIT_SP.
>>
> 
> Denis.


Re: [PATCH] PRs c++/51239, c++/51180 - Better support for unbound alias template specialization

2011-12-02 Thread Gabriel Dos Reis
On Fri, Dec 2, 2011 at 9:51 AM, Dodji Seketeli  wrote:

> After talking with you offline, we settled on using the existing
> BOUND_TEMPLATE_TEMPLATE_PARM tree to represent this new construct.
> The alias template and its unbound arguments are stored in the
> TYPE_TEMPLATE_INFO of the tree and its TYPE_NAME has the
> TYPE_DECL_ALIAS_P flag set.

This representation sounds much more regular to me, considering
the existing patterns of representing templates.  The "alias" part in
"template alias" just means that we can substitute right away when
the alias is applied.  Otherwise, it is a template (like any other) in
all other aspects (except it can't be deduced.)

-- Gaby


[PATCH] Fix AVX2 mulv32qi expander (PR target/51387)

2011-12-02 Thread Jakub Jelinek
Hi!

As reported by Michael, vect-116.c testcase fails with -mavx2, the
problem is that mulv32qi pattern computes wrong result, the second and
third quarters of the vector are swapped compared to what it should
contain.  This is because we can't use vec_extract_even_odd for V32QI
when we prepared the vpmullw arguments using vpunpck[hl]bw, because
those insns interleave only intra-lanes, therefore we want
to finalize the result using
{ 0,2,..,14,32,34,..,46,16,18,..,30,48,50,..,62 }
permutation instead of the current one
{ 0,2,..,14,16,18,..,30,32,34,..,46,48,50,..,62 }
The new permutation is even shorter (2 vpshufb + vpor) compared to the
extract even (2 vpshufb + vpor + vpermq).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-12-02  Jakub Jelinek  

PR target/51387
* config/i386/sse.md (mul3 with VI1_AVX2 iterator): For
V32QImode use { 0,2,..,14,32,34,..,46,16,18,..,30,48,50,..,62 }
permutation instead of extract even permutation.

--- gcc/config/i386/sse.md.jj   2011-12-01 11:44:58.0 +0100
+++ gcc/config/i386/sse.md  2011-12-02 12:18:42.657795749 +0100
@@ -5066,7 +5066,24 @@ (define_insn_and_split "mul3"
gen_lowpart (mulmode, t[3];
 
   /* Extract the even bytes and merge them back together.  */
-  ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
+  if (mode == V16QImode)
+ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
+  else
+{
+  /* Since avx2_interleave_{low,high}v32qi used above aren't cross-lane,
+this can't be normal even extraction, but one where additionally
+the second and third quarter are swapped.  That is even one insn
+shorter than even extraction.  */
+  rtvec v = rtvec_alloc (32);
+  for (i = 0; i < 32; ++i)
+   RTVEC_ELT (v, i)
+ = GEN_INT (i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0));
+  t[0] = operands[0];
+  t[1] = t[5];
+  t[2] = t[4];
+  t[3] = gen_rtx_CONST_VECTOR (mode, v);
+  ix86_expand_vec_perm_const (t);
+}
 
   set_unique_reg_note (get_last_insn (), REG_EQUAL,
   gen_rtx_MULT (mode, operands[1], operands[2]));

Jakub


[pph] Cleanup to support namespace aliases (2/2) (issue5434110)

2011-12-02 Thread Diego Novillo

The second part of the cleanup prepares support for writing merge keys
and merge bodies for structures other than ASTs.  The problem we are
having now is that when merging the bindings of namespace, the reader
creates an empty binding level, but after the merging is done, we go
on to create another (different) binding level for the same namespace,
which then confuses the reader.

This patch is large but mechanic.  It adds support for merge key and
merge body records on arbitrary data structures.  The actual fix to
namespace aliases comes in the next patch.

Tested on x86_64.


Diego.

* pph-in.c (pph_in_cxx_binding_1): Assert that we are not
reading a merge record.
(pph_in_class_binding): Likewise.
(pph_in_label_binding): Likewise.
(pph_in_language_function): Likewise.
(pph_in_struct_function): Likewise.
(pph_in_sorted_fields_type): Likewise.
(pph_in_lang_type_class): Likewise.
(pph_in_lang_type): Likewise.
(pph_in_merge_key_binding_level): Rename from pph_in_binding_merge_keys.
(pph_in_merge_body_binding_level_1): Rename from
pph_in_binding_merge_bodies_1.  Update all users.
(pph_in_merge_body_binding_level): Rename from
pph_in_binding_merge_bodies.  Update all users.
(pph_in_ref_lang_decl): Rename from pph_in_ref_lang_specific.
Update all users.
Assert that we are not reading a merge record.
(pph_in_lang_decl): Rename from pph_in_lang_specific.  Update all users.
(pph_in_merge_lang_decl): Rename from pph_in_merge_lang_specific.
* pph-out.c (pph_out_reference_record): Support
PPH_RECORD_START_MERGE_BODY records.
Return the written marker instead of a boolean value.
Update all users.
(pph_out_start_merge_key_record): Accept a void * instead of
tree.  Add pph_tag argument.  Update all users.
(pph_out_start_merge_key_tree_record): New.
(pph_out_merge_key_binding_level): Rename from
pph_out_binding_merge_keys.
(pph_out_merge_body_binding_level): Rneame from
pph_out_binding_merge_bodies.
(pph_out_cxx_binding_1): Assert that we are not writing a
merge record.
(pph_out_cxx_binding): Likewise.
(pph_out_class_binding): Likewise.
(pph_out_label_binding): Likewise.
(pph_out_language_function): Likewise.
(pph_out_struct_function): Likewise.
(pph_out_ld_parm): Likewise.
(pph_out_lang_decl): Likewise.
(pph_out_sorted_fields_type): Likewise.
(pph_out_lang_type_class): Likewise.
(pph_out_lang_type): Likewise.
(pph_out_cgraph_node): Likewise.
(pph_out_tree): Call pph_is_reference_or_end_marker.
* pph-streamer.h (pph_is_reference_or_end_marker): New.

diff --git a/gcc/cp/pph-in.c b/gcc/cp/pph-in.c
index 088c1a2..ce6aeae 100644
--- a/gcc/cp/pph-in.c
+++ b/gcc/cp/pph-in.c
@@ -998,6 +998,9 @@ pph_in_cxx_binding_1 (pph_stream *stream)
 return (cxx_binding *) pph_cache_find (stream, marker, image_ix, ix,
   PPH_cxx_binding);
 
+  /* Remove if we start emitting merge keys for this structure.  */
+  gcc_assert (marker == PPH_RECORD_START);
+
   value = pph_in_tree (stream);
   type = pph_in_tree (stream);
   ALLOC_AND_REGISTER (&stream->cache, ix, PPH_cxx_binding, cb,
@@ -1048,6 +1051,9 @@ pph_in_class_binding (pph_stream *stream)
 return (cp_class_binding *) pph_cache_find (stream, marker, image_ix, ix,
PPH_cp_class_binding);
 
+  /* Remove if we start emitting merge keys for this structure.  */
+  gcc_assert (marker == PPH_RECORD_START);
+
   ALLOC_AND_REGISTER (&stream->cache, ix, PPH_cp_class_binding, cb,
   ggc_alloc_cleared_cp_class_binding ());
   cb->base = pph_in_cxx_binding (stream);
@@ -1073,6 +1079,9 @@ pph_in_label_binding (pph_stream *stream)
 return (cp_label_binding *) pph_cache_find (stream, marker, image_ix, ix,
PPH_cp_label_binding);
 
+  /* Remove if we start emitting merge keys for this structure.  */
+  gcc_assert (marker == PPH_RECORD_START);
+
   ALLOC_AND_REGISTER (&stream->cache, ix, PPH_cp_label_binding, lb,
   ggc_alloc_cleared_cp_label_binding ());
   lb->label = pph_in_tree (stream);
@@ -1178,7 +1187,7 @@ pph_in_binding_level (pph_stream *stream)
 /* Read all the merge keys from STREAM into the cp_binding_level BL.  */
 
 static void
-pph_in_binding_merge_keys (pph_stream *stream, cp_binding_level *bl)
+pph_in_merge_key_binding_level (pph_stream *stream, cp_binding_level *bl)
 {
   /* Read all the merge keys and merge into the bindings.  */
   pph_in_merge_key_chain (stream, &bl->names);
@@ -1191,7 +1200,7 @@ pph_in_binding_merge_keys (pph_stream *stream, 
cp_binding_level *bl)
 /* Read all the merge bodies from STREAM into the cp_binding_level BL.  */
 
 static

Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread William J. Schmidt


On Fri, 2011-12-02 at 11:28 -0700, Jeff Law wrote:
> On 12/02/11 11:10, William J. Schmidt wrote:
> > 
> >> 
> >> I see a mention of creating equivalences for redundant PHIs?  Are
> >> we just trying to determine that two PHIs are going to result in
> >> the same value?
> > 
> > Jeff, see comment #37 in 
> > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39976.  The issue is
> > that we have two PHIs in the same block as follows:
> > 
> > # prephitmp.35_322 = PHI  
> > # prephitmp.35_225 = PHI 
> OK.  Make sure you has the phi argument & its edge...  Simlarly the
> equality comparison needs to check args & edges.  Or s/edge/edge->src/
> if that's easier.
> 

OK, this is what I was missing.  Including edge->src->index in the hash
and equality test to get the actual origin of the arguments should allow
the PHIs to match when they're truly equivalent.  Thanks!

> > 
> > The coalescing algorithm in tree-outof-ssa doesn't handle this well
> > and ends up splitting the back arc of a simple loop as a result.
> > I'm creating an equivalence between prephitmp.35_322 and
> > prephitmp.35_225 in this case so that one PHI goes dead and is
> > removed.
> Right.  I don't think I bothered with this because I didn't see it
> happening and the comparison of the PHIs with each other gets
> potentially expensive.
>
>  As pointed out in
> > the comments, this needs to be restricted to occur only in the
> > same block since the PHIs aren't necessarily equivalent otherwise.
> I must be missing something -- why is the equivalence only valid in
> the same block?
> 
> Conceptually this situation isn't any different replacing the second
> PHI with a copy
> blah_225 = blah_322
> 
> And the equivalence is valid for the entire dominator subtree.

Yes, once the origin of the PHI arguments is correctly taken into
account, then equivalent PHIs can be eliminated within the subtree.

> 
> If you really need to create a temporary equiv, pushing the marker,
> create the equiv and pop the marker when equiv dies is simple and easy.
>
> Jeff

Much obliged,
Bill



Re: [google] Patch to enable efficient function level instrumentation (issue 5416043)

2011-12-02 Thread davidxl

Have you uploaded the revised patch?

David


http://codereview.appspot.com/5416043/diff/1/gcc/config/i386/i386.c
File gcc/config/i386/i386.c (right):

http://codereview.appspot.com/5416043/diff/1/gcc/config/i386/i386.c#newcode10927
gcc/config/i386/i386.c:10927: + is later renamed to ''
by ix86_elf_asm_named_section().  */
On 2011/12/02 01:57:17, harshit wrote:

On 2011/11/28 22:16:27, davidxl wrote:
> Explain more on the comdat handling.



I have limited knowledge about comdat sections, so can't give a

detailed

explanation on why the assembler emits an error.


What does the assembler error look like? An example file would be
helpful.

http://codereview.appspot.com/5416043/diff/1/gcc/config/i386/i386.c#newcode10970
gcc/config/i386/i386.c:10970: +
sizeof("_function_patch_prologue") - 1;
Define a macro for the prologue and epilogue name.

http://codereview.appspot.com/5416043/diff/1/gcc/config/i386/i386.c#newcode10974
gcc/config/i386/i386.c:10974: +
section_name_length) == 0)
The two section name length happen to be the same, but it is not good to
share the same value here. sizeof (..) will be evaluated at compile
time.

http://codereview.appspot.com/5416043/diff/1/gcc/config/i386/i386.opt
File gcc/config/i386/i386.opt (right):

http://codereview.appspot.com/5416043/diff/1/gcc/config/i386/i386.opt#newcode570
gcc/config/i386/i386.opt:570: Minimum number of instructions in the
function without loop before the function is qualified for patching for
instrumentation (for use with -mpatch-functions-for-instrumentation)
in params.def

On 2011/12/02 01:57:17, harshit wrote:

On 2011/11/28 22:16:27, davidxl wrote:
> It may be better to define PARAM for it.



Param for this option? How to do it in options file? I looked at the

docs, but

didn't find anything on it.


http://codereview.appspot.com/5416043/


[pph] Cleanup to support namespace aliases (1/2) (issue5434109)

2011-12-02 Thread Diego Novillo

This patch is part 1 of a cleanup needed to support namespace aliases.
When processing a namespace aliase, we cannot access its binding level
because it is NULL.  Besides, it does not make sense to do anything
with it, since the binding level for the original namespace would've
been processed before.

The creation of binding level on the reader is tripping us up because
we end up with two binding levels for the same namespace.  This is
partially addressed in the next patch.

Tested on x86_64.


Diego.

* pph-in.c (pph_in_bool): New.
(pph_in_merge_key_namespace_decl): Factor out of ...
(pph_in_merge_key_tree): ... here.
Handle namespace aliases. 
* pph-out.c (pph_out_bool): New.
(pph_out_merge_key_namespace_decl): Factor out of ...
(pph_out_merge_key_tree): ... here.
Handle namespace aliases.

diff --git a/gcc/cp/pph-in.c b/gcc/cp/pph-in.c
index 73c93df..088c1a2 100644
--- a/gcc/cp/pph-in.c
+++ b/gcc/cp/pph-in.c
@@ -210,6 +210,17 @@ pph_in_bitpack (pph_stream *stream)
 }
 
 
+/* Read a boolean value from STREAM.  */
+
+static inline bool
+pph_in_bool (pph_stream *stream)
+{
+  unsigned val = pph_in_uint (stream);
+  gcc_assert (val <= 1);
+  return (bool) val;
+}
+
+
 / source information */
 
 
@@ -1575,7 +1586,7 @@ pph_in_lang_specific (pph_stream *stream, tree decl)
 }
 
 
-/* Read language specific data in DECL from STREAM.  */
+/* Read and merge language specific data in DECL from STREAM.  */
 
 static void
 pph_in_merge_lang_specific (pph_stream *stream, tree decl)
@@ -2223,6 +2234,40 @@ pph_in_tree_header (pph_stream *stream, enum LTO_tags 
tag)
 }
 
 
+/* Read all the merge keys for the names under namespace DECL from
+   STREAM.  */
+
+static void
+pph_in_merge_key_namespace_decl (pph_stream *stream, tree decl)
+{
+  bool is_namespace_alias;
+
+  /* If EXPR is a namespace alias, we do not need to merge
+ its binding level (namespaces aliases do not have a
+ binding level, they use the one from the namespace they
+ alias).  */
+  is_namespace_alias = pph_in_bool (stream);
+  if (!is_namespace_alias)
+{
+  cp_binding_level *bl;
+
+  if (DECL_LANG_SPECIFIC (decl))
+   /* Merging into an existing namespace.  */
+   bl = NAMESPACE_LEVEL (decl);
+  else
+   {
+ /* This is a new namespace.  Allocate a lang_decl and a binding
+level to DECL.  */
+ retrofit_lang_decl (decl);
+ bl = ggc_alloc_cleared_cp_binding_level ();
+ NAMESPACE_LEVEL (decl) = bl;
+   }
+
+  pph_in_binding_merge_keys (stream, bl);
+}
+}
+
+
 /* Read a merge key from STREAM.  If the merge key read from STREAM
is not found in *CHAIN, the newly allocated tree is added to it.  */
 
@@ -2268,20 +2313,7 @@ pph_in_merge_key_tree (pph_stream *stream, tree *chain)
   if (DECL_P (expr))
 {
   if (TREE_CODE (expr) == NAMESPACE_DECL)
-{
- cp_binding_level *bl;
- if (DECL_LANG_SPECIFIC (expr))
-   /* Merging into an existing namespace.  */
-   bl = NAMESPACE_LEVEL (expr);
- else
-   {
- /* This is a new namespace.  */
- retrofit_lang_decl (expr);
- bl = ggc_alloc_cleared_cp_binding_level ();
- NAMESPACE_LEVEL (expr) = bl;
-   }
- pph_in_binding_merge_keys (stream, bl);
-}
+   pph_in_merge_key_namespace_decl (stream, expr);
 #if 0
 /* FIXME pph: Disable type merging for the moment.  */
   else if (TREE_CODE (expr) == TYPE_DECL)
diff --git a/gcc/cp/pph-out.c b/gcc/cp/pph-out.c
index a4035f1..e1e21b9 100644
--- a/gcc/cp/pph-out.c
+++ b/gcc/cp/pph-out.c
@@ -168,6 +168,15 @@ pph_out_bitpack (pph_stream *stream, struct bitpack_d *bp)
 }
 
 
+/* Write a boolean value VAL to STREAM.  */
+
+static inline void
+pph_out_bool (pph_stream *stream, bool val)
+{
+  pph_out_uint (stream, val ? 1 : 0);
+}
+
+
 / source information */
 
 
@@ -2123,6 +2132,24 @@ pph_out_merge_name (pph_stream *stream, tree expr)
 }
 
 
+/* Write merge information for a namespace DECL to STREAM.  */
+
+static void
+pph_out_merge_key_namespace_decl (pph_stream *stream, tree decl)
+{
+  bool is_namespace_alias;
+
+  gcc_assert (TREE_CODE (decl) == NAMESPACE_DECL);
+
+  /* If EXPR is a namespace alias, it will not have an associated
+ binding.  In that case, tell the reader not to bother with it.  */
+  is_namespace_alias = (DECL_NAMESPACE_ALIAS (decl) != NULL_TREE);
+  pph_out_bool (stream, is_namespace_alias);
+  if (!is_namespace_alias)
+pph_out_binding_merge_keys (stream, NAMESPACE_LEVEL (decl));
+}
+
+
 /* Write the merge key for tree EXPR to STREAM.  */
 
 static void
@@ -2144,7 +2171,7 @@ pph_out_merge_key_tree (pph_stream *stream, tree expr)
   if (DECL_P (expr))
 {
   if (TREE_CODE (expr) == NAMESPACE_DECL)
-pph_out_binding_merge_keys (stream, 

[C++ Patch] PR 51313

2011-12-02 Thread Paolo Carlini

Hi,

here we ICE in C++11 mode in null_ptr_cst_p because integer_zerop is 
true for a NOP_EXPR, which TREE_OVERFLOW cannot handle. Thus the idea is 
using STRIP_NOPS in C++11 mode too: makes sense to me unless we have 
reasons of principle to exclude NOPs in that mode. Anyway, patch passes 
testing on x86_64-linux.


Thanks,
Paolo.

///
/cp
2011-12-02  Paolo Carlini  

PR c++/51313
* call.c (null_ptr_cst_p): STRIP_NOPS in c++11 mode too.

/testsuite
2011-12-02  Paolo Carlini  

PR c++/51313
* g++.dg/cpp0x/pr51313.C: New.

Index: testsuite/g++.dg/cpp0x/pr51313.C
===
--- testsuite/g++.dg/cpp0x/pr51313.C(revision 0)
+++ testsuite/g++.dg/cpp0x/pr51313.C(revision 0)
@@ -0,0 +1,18 @@
+// PR c++/51313
+// { dg-options "-std=c++0x" }
+
+class ostream;
+
+extern "C" {
+  extern int isdigit (int);
+}
+
+ostream&
+operator<<(ostream&, const unsigned char*);
+
+extern ostream cout;
+
+int main()
+{
+  cout << isdigit(0);
+}
Index: cp/call.c
===
--- cp/call.c   (revision 181932)
+++ cp/call.c   (working copy)
@@ -549,10 +549,8 @@ null_ptr_cst_p (tree t)
 {
   /* Core issue 903 says only literal 0 is a null pointer constant.  */
   if (cxx_dialect < cxx0x)
-   {
- t = integral_constant_value (t);
- STRIP_NOPS (t);
-   }
+   t = integral_constant_value (t);
+  STRIP_NOPS (t);
   if (integer_zerop (t) && !TREE_OVERFLOW (t))
return true;
 }


Re: [Patch] Increase array sizes in vect-tests to enable 256-bit vectorization

2011-12-02 Thread Ira Rosen


Michael Zolotukhin  wrote on 02/12/2011
08:11:41 PM:
>
> > Please don't change initial values to 0, we want to check that
everything
> > works fine for non-zeros as well.
> > There are several other occasions in the patch.
>
> Please check the update patch (attached).

This is ok with me.

Thanks,
Ira

>
> On 2 December 2011 20:49, Ira Rosen  wrote:
> >
> > gcc-patches-ow...@gcc.gnu.org wrote on 02/12/2011 06:23:25 PM:
> >
> >> Hi,
> >>
> >> This patch increases array sizes in tests from vect.exp suite, thus
> >> enabling 256-bit vectorization where it's available.
> >>
> >> Ok for trunk?
> >
> > --- a/gcc/testsuite/gcc.dg/vect/slp-24.c
> > +++ b/gcc/testsuite/gcc.dg/vect/slp-24.c
> > ...
> > @@ -13,14 +12,17 @@ typedef struct {
> >    unsigned char d;
> >  } s;
> >
> > -unsigned char ub[N*2] =
> > {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,1,3,6,9,12,15,18,21,
> 24,27,30,33,36,39,42,45};
> > -unsigned char uc[N] = {1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
> > +unsigned char ub[N*2];
> > +unsigned char uc[N];
> > +
> > +volatile int y = 0;
> > +unsigned char check_diff = 0;
> >
> >  void
> >  main1 (unsigned char x, unsigned char max_result, unsigned char
> > min_result, s *arr)
> >  {
> >   int i;
> > -  unsigned char udiff = 2;
> > +  unsigned char udiff = 0;
> >
> > Please don't change initial values to 0, we want to check that
everything
> > works fine for non-zeros as well.
> > There are several other occasions in the patch.
> >
> > Thanks,
> > Ira
> >
> >>
> >> Changelog:
> >> 2011-12-02  Michael Zolotukhin  
> >>
> >>    * gcc.dg/vect/slp-13.c: Increase array size, add initialization.
> >>    * gcc.dg/vect/slp-24.c: Ditto.
> >>    * gcc.dg/vect/slp-3.c: Likewise and fix scans.
> >>    * gcc.dg/vect/slp-34.c: Ditto.
> >>    * gcc.dg/vect/slp-4.c: Ditto.
> >>    * gcc.dg/vect/slp-cond-2.c: Ditto.
> >>    * gcc.dg/vect/slp-multitypes-11.c: Ditto.
> >>    * gcc.dg/vect/vect-1.c: Ditto.
> >>    * gcc.dg/vect/vect-10.c: Ditto.
> >>    * gcc.dg/vect/vect-105.c: Ditto.
> >>    * gcc.dg/vect/vect-112.c: Ditto.
> >>    * gcc.dg/vect/vect-15.c: Ditto.
> >>    * gcc.dg/vect/vect-2.c: Ditto.
> >>    * gcc.dg/vect/vect-31.c: Ditto.
> >>    * gcc.dg/vect/vect-32.c: Ditto.
> >>    * gcc.dg/vect/vect-33.c: Ditto.
> >>    * gcc.dg/vect/vect-34.c: Ditto.
> >>    * gcc.dg/vect/vect-35.c: Ditto.
> >>    * gcc.dg/vect/vect-36.c: Ditto.
> >>    * gcc.dg/vect/vect-6.c: Ditto.
> >>    * gcc.dg/vect/vect-73.c: Ditto.
> >>    * gcc.dg/vect/vect-74.c: Ditto.
> >>    * gcc.dg/vect/vect-75.c: Ditto.
> >>    * gcc.dg/vect/vect-76.c: Ditto.
> >>    * gcc.dg/vect/vect-80.c: Ditto.
> >>    * gcc.dg/vect/vect-85.c: Ditto.
> >>    * gcc.dg/vect/vect-89.c: Ditto.
> >>    * gcc.dg/vect/vect-97.c: Ditto.
> >>    * gcc.dg/vect/vect-98.c: Ditto.
> >>    * gcc.dg/vect/vect-all.c: Ditto.
> >>    * gcc.dg/vect/vect-double-reduc-6.c: Ditto.
> >>    * gcc.dg/vect/vect-iv-8.c: Ditto.
> >>    * gcc.dg/vect/vect-iv-8a.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-1.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-1a.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-1b.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-2.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-2a.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-2c.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-3.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-3a.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-4a.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-4b.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-4c.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-4d.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-4m.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-fir-lb.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-fir.c: Ditto.
> >>    * gcc.dg/vect/vect-over-widen-1.c: Ditto.
> >>    * gcc.dg/vect/vect-over-widen-2.c: Ditto.
> >>    * gcc.dg/vect/vect-over-widen-3.c: Ditto.
> >>    * gcc.dg/vect/vect-over-widen-4.c: Ditto.
> >>    * gcc.dg/vect/vect-reduc-1char.c: Ditto.
> >>    * gcc.dg/vect/vect-reduc-2char.c: Ditto.
> >>    * gcc.dg/vect/vect-reduc-pattern-1b.c: Ditto.
> >>    * gcc.dg/vect/vect-reduc-pattern-1c.c: Ditto.
> >>    * gcc.dg/vect/vect-reduc-pattern-2b.c: Ditto.
> >>    * gcc.dg/vect/vect-shift-2.c: Ditto.
> >>    * gcc.dg/vect/vect-strided-a-u8-i8-gap2.c: Ditto.
> >>    * gcc.dg/vect/vect-strided-a-u8-i8-gap7.c: Ditto.
> >>    * gcc.dg/vect/vect-strided-u8-i8-gap2.c: Ditto.
> >>    * gcc.dg/vect/vect-strided-u8-i8-gap4.c: Ditto.
> >>    * gcc.dg/vect/vect-strided-u8-i8-gap7.c: Ditto.
> >>
> >> --
> >> ---
> >> Best regards,
> >> Michael V. Zolotukhin,
> >> Software Engineer
> >> Intel Corporation.
> >> [attachment "vect_tests.patch" deleted by Ira Rosen/Haifa/IBM]
> >
>
>
>
> --
> ---
> Best regards,
> Michael V. Zolotukhin,
> Software Engineer
> Intel Corporation.
> [attachment "vect_tests-2.patch" deleted by Ira Rosen/Haifa/IBM]



[testsuite,committed] Fix gcc.c-torture/execute/vector-subscript-1.c

2011-12-02 Thread Georg-Johann Lay
http://gcc.gnu.org/viewcvs?view=revision&revision=181933

Committed the following, obvious fix to a test case that assumed
 sizeof(int) = 4:


Johann


--- trunk/gcc/testsuite/gcc.c-torture/execute/vector-subscript-1.c  
2011/12/02
17:44:28181932
+++ trunk/gcc/testsuite/gcc.c-torture/execute/vector-subscript-1.c  
2011/12/02
18:45:07181933
@@ -49,7 +49,7 @@
   if (*f(&val3, 3 ) != 4)
 __builtin_abort ();

-  __builtin_memcpy(a, &val3, 16);
+  __builtin_memcpy (a, &val3, sizeof(a));
   for(i = 0; i < 4; i++)
 if (a[i] != i+1)
   __builtin_abort ();



Re: [Patch,AVR] Was/Fix: error linking lto1 for target avr

2011-12-02 Thread Georg-Johann Lay
Denis Chertykov wrote:
> 2011/11/29 Georg-Johann Lay :
>> Ian Lance Taylor wrote:
>>> Georg-Johann Lay  writes:
>>>
 So if a frontend can define address spaces and it is a generic feature, the
 question is how to get the name of an address space in a generic, language
 independent way.
>>> We could decide that all frontends that use address spaces must define a
>>> printable name for each address space.  That would mean changing the
>>> middle-end address space interface to give a name to each address space.
>>> The current middle-end address space interface does not require that
>>> address spaces have a name.  I was not involved in the addition of
>>> address spaces to gcc, and I don't know why they followed the path they
>>> did.
>>>
>>> Ian
>> Presumably they chose that approach to keep it simple or it is even a
>> performance issue to move the name around.
>>
>> I attached a patch but I fail to find the right configure options for
>> gcc/binutils as the testsuite complains
>>
>> ./avr/bin/ld: bad -plugin option
>>
>> Configured gcc with --enable-lto and binutils 2.21 with --enable-plugin.
>>
>> Maybe the patch can be pre-approved so that the others can proceed with 
>> their work?
> 
> Better to complete this work.
> 
> Denis.

http://gcc.gnu.org/ml/gcc-patches/2011-11/msg02574.html

I now switched back to --disable-lto as I could not resolve the problems that
appear to be a collect2 issue, see

http://gcc.gnu.org/ml/gcc-help/2011-12/msg00016.html

What I can do is:

* build the compiler with the patch and with LTO enabled and without
  getting a linker error for c_addr_space_name.

* I cannot get usable results from testsuite because of collect2 breakage

* Testsuite passes fine with the patch and --disable-lto the only
  regression that I get is
gcc.c-torture/execute/vector-subscript-1.c
  which is because of bogus testcase that assumes sizeof(int) = 4
  and passes with the fix

Index: gcc/testsuite/gcc.c-torture/execute/vector-subscript-1.c
===
--- gcc/testsuite/gcc.c-torture/execute/vector-subscript-1.c  (revision 181932)
+++ gcc/testsuite/gcc.c-torture/execute/vector-subscript-1.c  (working copy)
@@ -49,7 +49,7 @@ int main( int argc, char* argv[] )
   if (*f(&val3, 3 ) != 4)
 __builtin_abort ();

-  __builtin_memcpy(a, &val3, 16);
+  __builtin_memcpy(a, &val3, sizeof(a));
   for(i = 0; i < 4; i++)
 if (a[i] != i+1)
   __builtin_abort ();


or


-  __builtin_memcpy(a, &val3, 16);
+  __builtin_memcpy(a, &val3, 4*sizeof(int));



Ok for trunk?

Johann

* config/avr/avr.h (ADDR_SPACE_PGM, ADDR_SPACE_PGM1,
ADDR_SPACE_PGM2, ADDR_SPACE_PGM3, ADDR_SPACE_PGM4,
ADDR_SPACE_PGM5, ADDR_SPACE_PGMX): Write as enum.
(avr_addrspace_t): New typedef.
(avr_addrspace): New declaration.
* config/avr/avr-c.c (avr_toupper): New static function.
(avr_register_target_pragmas, avr_cpu_cpp_builtins): Use
avr_addrspace to get address space information.
* config/avr/avr.c (avr_addrspace): New variable.
(avr_out_lpm, avr_pgm_check_var_decl, avr_insert_attributes,
avr_asm_named_section, avr_section_type_flags,
avr_asm_select_section, avr_addr_space_address_mode,
avr_addr_space_convert, avr_emit_movmemhi): Use it.
(avr_addr_space_pointer_mode): Forward to avr_addr_space_address_mode.
(avr_pgm_segment): Remove.





Re: [PATCH] Implement stap probe on ARM's unwinder

2011-12-02 Thread Sergio Durigan Junior
Ramana Radhakrishnan  writes:

> Sergio: Other than a few minor tweaks to the Changelog it largely
> looks obvious to me.

Hello Ramana,

Thanks for the review.  Here is the updated version of the patch.

I asked Tom Tromey to commit it for me, since I don't have write
permission on the repository.

Thank you again,

Sergio.


diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index e7f18e2..0901cae 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,14 @@
+2011-12-02  Sergio Durigan Junior  
+
+   * unwind-arm-common.inc: Include `tconfig.h', `tsystem.h' and
+   `sys/sdt.h'.
+   (_Unwind_DebugHook): New function.
+   (uw_restore_core_regs): New define.
+   (unwind_phase2): Use uw_restore_core_regs instead of
+   restore_core_regs.
+   (unwind_phase2_forced): Likewise.
+   (__gnu_Unwind_Resume): Likewise.
+
 2011-11-30  John David Anglin  
 
PR other/51272
diff --git a/libgcc/unwind-arm-common.inc b/libgcc/unwind-arm-common.inc
index 0713056..bf16902 100644
--- a/libgcc/unwind-arm-common.inc
+++ b/libgcc/unwind-arm-common.inc
@@ -21,8 +21,15 @@
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
.  */
 
+#include "tconfig.h"
+#include "tsystem.h"
 #include "unwind.h"
 
+/* Used for SystemTap unwinder probe.  */
+#ifdef HAVE_SYS_SDT_H
+#include 
+#endif
+
 /* We add a prototype for abort here to avoid creating a dependency on
target headers.  */
 extern void abort (void);
@@ -105,6 +112,44 @@ static inline _uw selfrel_offset31 (const _uw *p);
 
 static _uw __gnu_unwind_get_pr_addr (int idx);
 
+static void _Unwind_DebugHook (void *, void *)
+  __attribute__ ((__noinline__, __used__, __noclone__));
+
+/* This function is called during unwinding.  It is intended as a hook
+   for a debugger to intercept exceptions.  CFA is the CFA of the
+   target frame.  HANDLER is the PC to which control will be
+   transferred.  */
+
+static void
+_Unwind_DebugHook (void *cfa __attribute__ ((__unused__)),
+  void *handler __attribute__ ((__unused__)))
+{
+  /* We only want to use stap probes starting with v3.  Earlier
+ versions added too much startup cost.  */
+#if defined (HAVE_SYS_SDT_H) && defined (STAP_PROBE2) && _SDT_NOTE_TYPE >= 3
+  STAP_PROBE2 (libgcc, unwind, cfa, handler);
+#else
+  asm ("");
+#endif
+}
+
+/* This is a wrapper to be called when we need to restore core registers.
+   It will call `_Unwind_DebugHook' before restoring the registers, thus
+   making it possible to intercept and debug exceptions.
+
+   When calling `_Unwind_DebugHook', the first argument (the CFA) is zero
+   because we are not interested in it.  However, it must be there (even
+   being zero) because GDB expects to find it when using the probe.  */
+
+#define uw_restore_core_regs(TARGET, CORE)   \
+  do \
+{\
+  void *handler = __builtin_frob_return_addr ((void *) VRS_PC (TARGET));  \
+  _Unwind_DebugHook (0, handler);\
+  restore_core_regs (CORE);
  \
+}\
+  while (0)
+
 /* Perform a binary search for RETURN_ADDRESS in TABLE.  The table contains
NREC entries.  */
 
@@ -253,8 +298,8 @@ unwind_phase2 (_Unwind_Control_Block * ucbp, phase2_vrs * 
vrs)
   
   if (pr_result != _URC_INSTALL_CONTEXT)
 abort();
-  
-  restore_core_regs (&vrs->core);
+
+  uw_restore_core_regs (vrs, &vrs->core);
 }
 
 /* Perform phase2 forced unwinding.  */
@@ -339,7 +384,7 @@ unwind_phase2_forced (_Unwind_Control_Block *ucbp, 
phase2_vrs *entry_vrs,
   return _URC_FAILURE;
 }
 
-  restore_core_regs (&saved_vrs.core);
+  uw_restore_core_regs (&saved_vrs, &saved_vrs.core);
 }
 
 /* This is a very limited implementation of _Unwind_GetCFA.  It returns
@@ -450,7 +495,7 @@ __gnu_Unwind_Resume (_Unwind_Control_Block * ucbp, 
phase2_vrs * entry_vrs)
 {
 case _URC_INSTALL_CONTEXT:
   /* Upload the registers to enter the landing pad.  */
-  restore_core_regs (&entry_vrs->core);
+  uw_restore_core_regs (entry_vrs, &entry_vrs->core);
 
 case _URC_CONTINUE_UNWIND:
   /* Continue unwinding the next frame.  */


Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread Jeff Law
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 12/02/11 11:10, William J. Schmidt wrote:
> 
>> 
>> I see a mention of creating equivalences for redundant PHIs?  Are
>> we just trying to determine that two PHIs are going to result in
>> the same value?
> 
> Jeff, see comment #37 in 
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39976.  The issue is
> that we have two PHIs in the same block as follows:
> 
> # prephitmp.35_322 = PHI  
> # prephitmp.35_225 = PHI 
OK.  Make sure you has the phi argument & its edge...  Simlarly the
equality comparison needs to check args & edges.  Or s/edge/edge->src/
if that's easier.

> 
> The coalescing algorithm in tree-outof-ssa doesn't handle this well
> and ends up splitting the back arc of a simple loop as a result.
> I'm creating an equivalence between prephitmp.35_322 and
> prephitmp.35_225 in this case so that one PHI goes dead and is
> removed.
Right.  I don't think I bothered with this because I didn't see it
happening and the comparison of the PHIs with each other gets
potentially expensive.


 As pointed out in
> the comments, this needs to be restricted to occur only in the
> same block since the PHIs aren't necessarily equivalent otherwise.
I must be missing something -- why is the equivalence only valid in
the same block?

Conceptually this situation isn't any different replacing the second
PHI with a copy
blah_225 = blah_322

And the equivalence is valid for the entire dominator subtree.

If you really need to create a temporary equiv, pushing the marker,
create the equiv and pop the marker when equiv dies is simple and easy.

Jeff
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJO2RjmAAoJEBRtltQi2kC7I7YH/RTVZF6jzBpyg+68kMTZuHDH
hETg03jnqhSXhWOzBoALZwtR9iCJ/UehCCcnjQ3eV7o1AqsImLMnap+VrS6lYfI3
hYQXtdb2F770H70yY/fgEX/VDAdGlyduXfMqHl4mLw7Apz3mpvyBucqHqECHJ+xj
AtZf3duGXp3Fpvhm3PZ7wQa+Pl0qnJU3VxqU6xkGWn9A8et8U1IdYs/wnHPR/HPY
KP7oY3JWxwR6kpzNkFJM1OZ+Nn9XWGgScAp1uBXJhK2RLgIuxaLiM5wl6VAJR1Fs
EuMBnQbHELA1ugtqC26UsDCTkjDpLGgs02ID7ArySsVgmIYvXCVzEi0cdJA42V4=
=NKrB
-END PGP SIGNATURE-


Re: [PATCH] Implement stap probe on ARM's unwinder

2011-12-02 Thread Sergio Durigan Junior
Bernd Schmidt  writes:

> On 12/01/11 13:01, Ramana Radhakrishnan wrote:
>> Sergio: Other than a few minor tweaks to the Changelog it largely
>> looks obvious to me.
>> 
>> Bernd, could you take another look at this since this is now shared
>> with the c6x backend ?
>
> Doesn't look like it would cause problems. I have no idea what
> builtin_frob_return_addr does but it appears to exist everywhere.

Thanks for the reviews.  I guess I'll leave the call to
builtin_frob_return_addr there.  So, after addressing Ramana's
suggestions to ChangeLog, is this patch OK to go in?

Thanks.


Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread William J. Schmidt


On Fri, 2011-12-02 at 10:52 -0700, Jeff Law wrote:
> On 12/02/11 10:40, Michael Matz wrote:
> > Hi,
> > 
> > On Fri, 2 Dec 2011, William J. Schmidt wrote:
> > 
> >> It seems like a fair amount of rip-up to avoid keeping the PHI
> >> state around between blocks, so I just check to ensure the PHI
> >> definitions occur in the same block before recording their
> >> equivalence.
> > 
> > Then you should at least mix the BB number into the hash value (and
> >  possibly also check it already in hashable_expr_equal_p) in order
> > to reduce number of collissions.
> > 
> > But I wonder why it's not enough to just do a push/pop sequence on
> >  avail_exprs_stack around your new PHI processing in
> > dom_opt_enter_block, ala
> > 
> > +  VEC_safe_push (expr_hash_elt_t, heap, avail_exprs_stack, NULL); 
> > /* Create equivalences from redundant PHIs.  */ for (gsi =
> > gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 
> > eliminate_redundant_computations (&gsi); +
> > remove_local_expressions_from_table ();
> > 
> > on top of your current version.  That ought to remove the added PHI
> >  expressions (and only them) from the hash table but retain the
> > information of equality in the const_or_copies_stack.  Checking the
> > BB wouldn't be required then.
> Sorry, I haven't been following this thread and there isn't much
> discussion about what problem we're trying to solve using DOM within
> the PR.
> 
> I see a mention of creating equivalences for redundant PHIs?  Are we
> just trying to determine that two PHIs are going to result in the same
> value?

Jeff, see comment #37 in
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39976.  The issue is that we
have two PHIs in the same block as follows:

  # prephitmp.35_322 = PHI 
  # prephitmp.35_225 = PHI 

The coalescing algorithm in tree-outof-ssa doesn't handle this well and
ends up splitting the back arc of a simple loop as a result.  I'm
creating an equivalence between prephitmp.35_322 and prephitmp.35_225 in
this case so that one PHI goes dead and is removed.  As pointed out in
the comments, this needs to be restricted to occur only in the same
block since the PHIs aren't necessarily equivalent otherwise.

Michael, you're quite right, I think using the extra marker and stack
pop should work quite well and would be more elegant and efficient.
I'll try it out.  I was initially thinking the PHI copies and existing
copies would be intermingled, but that's not the case.

Thanks,
Bill

> 
> jeff



Re: rs6000 options change for rtems.h

2011-12-02 Thread Joel Sherrill

On 12/02/2011 11:57 AM, Joseph S. Myers wrote:

On Fri, 2 Dec 2011, Joel Sherrill wrote:


On 12/02/2011 10:38 AM, Joseph S. Myers wrote:

On Fri, 2 Dec 2011, Joel Sherrill wrote:


2011-12-02  Joel Sherrill

  * config/rs6000/rtems.h: Switch to using global_options_set
  in SUBSUBTARGET_OVERRIDE_OPTIONS.

Is it deliberate that you are removing the first part of each "if"
condition (thus, no longer checking TARGET_HARD_FLOAT before setting
rs6000_float_gprs, no longer checking rs6000_float_gprs before setting
rs6000_spe, etc.)?


I patterned this after what was in other files.
It is done this way everywhere it is referenced.

Should all of them be changed?

Not necessarily.

I described how I think this sort of logic should work in
.  I think that means
something closer to the other headers than to rtems.h - but I don't think
a semantic change should be mixed with a change that's just supposed to
get things to build again.


OK.  I obviously read too much into the other uses.
I did not intend to change semantics just account for
the change making this not compile.

How does the the new version look?

--
Joel Sherrill, Ph.D. Director of Research&  Development
joel.sherr...@oarcorp.comOn-Line Applications Research
Ask me about RTEMS: a free RTOS  Huntsville AL 35805
   Support Available (256) 722-9985


Index: rtems.h
===
--- rtems.h (revision 181924)
+++ rtems.h (working copy)
@@ -57,15 +57,15 @@
   { "cpp_os_rtems",CPP_OS_RTEMS_SPEC }
 
 #undef SUBSUBTARGET_OVERRIDE_OPTIONS
-#define SUBSUBTARGET_OVERRIDE_OPTIONS  \
-  do { \
-if (TARGET_E500)   \
-  {
\
-if (TARGET_HARD_FLOAT && !rs6000_explicit_options.float_gprs)  \
-  rs6000_float_gprs = 1;   \
-if (rs6000_float_gprs != 0 && !rs6000_explicit_options.spe)\
-  rs6000_spe = 1;  \
-if (rs6000_spe && !rs6000_explicit_options.spe_abi)\
-  rs6000_spe_abi = 1;  \
-  }
\
+#define SUBSUBTARGET_OVERRIDE_OPTIONS \
+  do {\
+   if (TARGET_E500)   \
+  {   \
+if (TARGET_HARD_FLOAT && !global_options_set.x_rs6000_float_gprs) \
+  rs6000_float_gprs = 1;  \
+if (rs6000_float_gprs != 0 && !global_options_set.x_rs6000_spe)   \
+  rs6000_spe = 1; \
+if (rs6000_spe && !global_options_set.x_rs6000_spe_abi)   \
+  rs6000_spe_abi = 1; \
+  }   \
   } while(0)


Re: rs6000 options change for rtems.h

2011-12-02 Thread Joseph S. Myers
On Fri, 2 Dec 2011, Joel Sherrill wrote:

> On 12/02/2011 10:38 AM, Joseph S. Myers wrote:
> > On Fri, 2 Dec 2011, Joel Sherrill wrote:
> > 
> > > 2011-12-02  Joel Sherrill
> > > 
> > >  * config/rs6000/rtems.h: Switch to using global_options_set
> > >  in SUBSUBTARGET_OVERRIDE_OPTIONS.
> > Is it deliberate that you are removing the first part of each "if"
> > condition (thus, no longer checking TARGET_HARD_FLOAT before setting
> > rs6000_float_gprs, no longer checking rs6000_float_gprs before setting
> > rs6000_spe, etc.)?
> > 
> I patterned this after what was in other files.
> It is done this way everywhere it is referenced.
> 
> Should all of them be changed?

Not necessarily.

I described how I think this sort of logic should work in 
.  I think that means 
something closer to the other headers than to rtems.h - but I don't think 
a semantic change should be mixed with a change that's just supposed to 
get things to build again.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread Jeff Law
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 12/02/11 10:40, Michael Matz wrote:
> Hi,
> 
> On Fri, 2 Dec 2011, William J. Schmidt wrote:
> 
>> It seems like a fair amount of rip-up to avoid keeping the PHI
>> state around between blocks, so I just check to ensure the PHI
>> definitions occur in the same block before recording their
>> equivalence.
> 
> Then you should at least mix the BB number into the hash value (and
>  possibly also check it already in hashable_expr_equal_p) in order
> to reduce number of collissions.
> 
> But I wonder why it's not enough to just do a push/pop sequence on
>  avail_exprs_stack around your new PHI processing in
> dom_opt_enter_block, ala
> 
> +  VEC_safe_push (expr_hash_elt_t, heap, avail_exprs_stack, NULL); 
> /* Create equivalences from redundant PHIs.  */ for (gsi =
> gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 
> eliminate_redundant_computations (&gsi); +
> remove_local_expressions_from_table ();
> 
> on top of your current version.  That ought to remove the added PHI
>  expressions (and only them) from the hash table but retain the
> information of equality in the const_or_copies_stack.  Checking the
> BB wouldn't be required then.
Sorry, I haven't been following this thread and there isn't much
discussion about what problem we're trying to solve using DOM within
the PR.

I see a mention of creating equivalences for redundant PHIs?  Are we
just trying to determine that two PHIs are going to result in the same
value?

jeff
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJO2RB6AAoJEBRtltQi2kC7niIIAJDgImG8IWhtIDjF7t7blUNj
uR8KCppurbvTkHgfuCSrn4hLRdRa14vZrY/FvP7pCaRmQ5KPBghu1IumXujVvb2i
bLtwZBggjox9mVnUjv5CizURAwJcmvPhJE5axTpEACrafzI+AuADNW8qQwO2MQmF
Ay3EXEPh27DbQi4E7IiytWQpuBsmFprh6Xu7nzW7YaK8zOGuGOEVdK5kDrZRPhLk
etq2AY4OISwClyXZHGhPqCsC4haxo80F8qzRVJ2c2EbxEMTu45CNm4fRNutR/pA4
Ly/d0WKs1YF4yTjMSEL6w5VTIFQNk1RyDAyh1OA/M01UAMWP8BHdr9tw01s4Bws=
=4X0z
-END PGP SIGNATURE-


Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread Michael Matz
Hi,

On Fri, 2 Dec 2011, William J. Schmidt wrote:

> It seems like a fair amount of rip-up to avoid keeping the PHI state 
> around between blocks, so I just check to ensure the PHI definitions 
> occur in the same block before recording their equivalence. 

Then you should at least mix the BB number into the hash value (and 
possibly also check it already in hashable_expr_equal_p) in order to 
reduce number of collissions.

But I wonder why it's not enough to just do a push/pop sequence on 
avail_exprs_stack around your new PHI processing in dom_opt_enter_block, 
ala

+  VEC_safe_push (expr_hash_elt_t, heap, avail_exprs_stack, NULL);
   /* Create equivalences from redundant PHIs.  */
   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 eliminate_redundant_computations (&gsi);
+  remove_local_expressions_from_table ();

on top of your current version.  That ought to remove the added PHI 
expressions (and only them) from the hash table but retain the information 
of equality in the const_or_copies_stack.  Checking the BB wouldn't be 
required then.
 

Ciao,
Michael.


Re: [Patch] Increase array sizes in vect-tests to enable 256-bit vectorization

2011-12-02 Thread Michael Zolotukhin
>
> Shouldn't we add a variant for each testcase so that we still
> excercise both 128-bit and 256-bit vectorization paths?

These tests are still good to test 128-bit vectorization, the changes
was made just to make sure that 256-bit vectorization is possible on
the tests.

Actually, It's just first step in enabling these tests for 256 bits -
for now many of them are failing if '-mavx' or '-mavx2' is specified
(mostly due to different diagnostics messages produced by vectorizer),
but with original (small) sizes of arrays we couldn't even check that.
When they are enabled, it'll be possible to use them for testing both
128- and 256- bit vectorization.

Michael


2011/12/2 Richard Guenther :
> 2011/12/2 Michael Zolotukhin :
>> Hi,
>>
>> This patch increases array sizes in tests from vect.exp suite, thus
>> enabling 256-bit vectorization where it's available.
>>
>> Ok for trunk?
>
> Shouldn't we add a variant for each testcase so that we still
> excercise both 128-bit and 256-bit vectorization paths?
>
>> Changelog:
>> 2011-12-02  Michael Zolotukhin  
>>
>>        * gcc.dg/vect/slp-13.c: Increase array size, add initialization.
>>        * gcc.dg/vect/slp-24.c: Ditto.
>>        * gcc.dg/vect/slp-3.c: Likewise and fix scans.
>>        * gcc.dg/vect/slp-34.c: Ditto.
>>        * gcc.dg/vect/slp-4.c: Ditto.
>>        * gcc.dg/vect/slp-cond-2.c: Ditto.
>>        * gcc.dg/vect/slp-multitypes-11.c: Ditto.
>>        * gcc.dg/vect/vect-1.c: Ditto.
>>        * gcc.dg/vect/vect-10.c: Ditto.
>>        * gcc.dg/vect/vect-105.c: Ditto.
>>        * gcc.dg/vect/vect-112.c: Ditto.
>>        * gcc.dg/vect/vect-15.c: Ditto.
>>        * gcc.dg/vect/vect-2.c: Ditto.
>>        * gcc.dg/vect/vect-31.c: Ditto.
>>        * gcc.dg/vect/vect-32.c: Ditto.
>>        * gcc.dg/vect/vect-33.c: Ditto.
>>        * gcc.dg/vect/vect-34.c: Ditto.
>>        * gcc.dg/vect/vect-35.c: Ditto.
>>        * gcc.dg/vect/vect-36.c: Ditto.
>>        * gcc.dg/vect/vect-6.c: Ditto.
>>        * gcc.dg/vect/vect-73.c: Ditto.
>>        * gcc.dg/vect/vect-74.c: Ditto.
>>        * gcc.dg/vect/vect-75.c: Ditto.
>>        * gcc.dg/vect/vect-76.c: Ditto.
>>        * gcc.dg/vect/vect-80.c: Ditto.
>>        * gcc.dg/vect/vect-85.c: Ditto.
>>        * gcc.dg/vect/vect-89.c: Ditto.
>>        * gcc.dg/vect/vect-97.c: Ditto.
>>        * gcc.dg/vect/vect-98.c: Ditto.
>>        * gcc.dg/vect/vect-all.c: Ditto.
>>        * gcc.dg/vect/vect-double-reduc-6.c: Ditto.
>>        * gcc.dg/vect/vect-iv-8.c: Ditto.
>>        * gcc.dg/vect/vect-iv-8a.c: Ditto.
>>        * gcc.dg/vect/vect-outer-1.c: Ditto.
>>        * gcc.dg/vect/vect-outer-1a.c: Ditto.
>>        * gcc.dg/vect/vect-outer-1b.c: Ditto.
>>        * gcc.dg/vect/vect-outer-2.c: Ditto.
>>        * gcc.dg/vect/vect-outer-2a.c: Ditto.
>>        * gcc.dg/vect/vect-outer-2c.c: Ditto.
>>        * gcc.dg/vect/vect-outer-3.c: Ditto.
>>        * gcc.dg/vect/vect-outer-3a.c: Ditto.
>>        * gcc.dg/vect/vect-outer-4a.c: Ditto.
>>        * gcc.dg/vect/vect-outer-4b.c: Ditto.
>>        * gcc.dg/vect/vect-outer-4c.c: Ditto.
>>        * gcc.dg/vect/vect-outer-4d.c: Ditto.
>>        * gcc.dg/vect/vect-outer-4m.c: Ditto.
>>        * gcc.dg/vect/vect-outer-fir-lb.c: Ditto.
>>        * gcc.dg/vect/vect-outer-fir.c: Ditto.
>>        * gcc.dg/vect/vect-over-widen-1.c: Ditto.
>>        * gcc.dg/vect/vect-over-widen-2.c: Ditto.
>>        * gcc.dg/vect/vect-over-widen-3.c: Ditto.
>>        * gcc.dg/vect/vect-over-widen-4.c: Ditto.
>>        * gcc.dg/vect/vect-reduc-1char.c: Ditto.
>>        * gcc.dg/vect/vect-reduc-2char.c: Ditto.
>>        * gcc.dg/vect/vect-reduc-pattern-1b.c: Ditto.
>>        * gcc.dg/vect/vect-reduc-pattern-1c.c: Ditto.
>>        * gcc.dg/vect/vect-reduc-pattern-2b.c: Ditto.
>>        * gcc.dg/vect/vect-shift-2.c: Ditto.
>>        * gcc.dg/vect/vect-strided-a-u8-i8-gap2.c: Ditto.
>>        * gcc.dg/vect/vect-strided-a-u8-i8-gap7.c: Ditto.
>>        * gcc.dg/vect/vect-strided-u8-i8-gap2.c: Ditto.
>>        * gcc.dg/vect/vect-strided-u8-i8-gap4.c: Ditto.
>>        * gcc.dg/vect/vect-strided-u8-i8-gap7.c: Ditto.
>>
>> --
>> ---
>> Best regards,
>> Michael V. Zolotukhin,
>> Software Engineer
>> Intel Corporation.


Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread William J. Schmidt
Sorry for the previous brain-fart!  Here's a revised patch.

It seems like a fair amount of rip-up to avoid keeping the PHI state
around between blocks, so I just check to ensure the PHI definitions
occur in the same block before recording their equivalence.
lookup_avail_expr may return a constant when a PHI has been proven
equivalent to a constant; I skip these since there's nothing further
that needs to be done with them.

Bootstrapped and regression tested on powerpc64-linux.  Ok for trunk?

Thanks,
Bill


2011-12-02  Bill Schmidt  

PR middle-end/39976
* tree-ssa-dom.c (enum expr_kind): Add EXPR_PHI.
(struct hashable_expr): Add struct phi field.
(initialize_hash_element): Handle phis.
(hashable_expr_equal_p): Likewise.
(iterative_hash_hashable_expr): Likewise.
(print_expr_hash_elt): Likewise.
(dom_opt_enter_block): Create equivalences from redundant phis.
(eliminate_redundant_computations): Handle redundant phis.
(lookup_avail_expr): Handle phis.


Index: gcc/tree-ssa-dom.c
===
--- gcc/tree-ssa-dom.c  (revision 181928)
+++ gcc/tree-ssa-dom.c  (working copy)
@@ -52,7 +52,8 @@ enum expr_kind
   EXPR_UNARY,
   EXPR_BINARY,
   EXPR_TERNARY,
-  EXPR_CALL
+  EXPR_CALL,
+  EXPR_PHI
 };
 
 struct hashable_expr
@@ -65,6 +66,7 @@ struct hashable_expr
 struct { enum tree_code op;  tree opnd0, opnd1; } binary;
 struct { enum tree_code op;  tree opnd0, opnd1, opnd2; } ternary;
 struct { gimple fn_from; bool pure; size_t nargs; tree *args; } call;
+struct { size_t nargs; tree *args; } phi;
   } ops;
 };
 
@@ -281,6 +283,19 @@ initialize_hash_element (gimple stmt, tree lhs,
   expr->kind = EXPR_SINGLE;
   expr->ops.single.rhs = gimple_goto_dest (stmt);
 }
+  else if (code == GIMPLE_PHI)
+{
+  size_t nargs = gimple_phi_num_args (stmt);
+  size_t i;
+
+  expr->type = TREE_TYPE (gimple_phi_result (stmt));
+  expr->kind = EXPR_PHI;
+  expr->ops.phi.nargs = nargs;
+  expr->ops.phi.args = (tree *) xcalloc (nargs, sizeof (tree));
+
+  for (i = 0; i < nargs; i++)
+expr->ops.phi.args[i] = gimple_phi_arg_def (stmt, i);
+}
   else
 gcc_unreachable ();
 
@@ -439,6 +454,21 @@ hashable_expr_equal_p (const struct hashable_expr
 return true;
   }
 
+case EXPR_PHI:
+  {
+size_t i;
+
+if (expr0->ops.phi.nargs !=  expr1->ops.phi.nargs)
+  return false;
+
+for (i = 0; i < expr0->ops.phi.nargs; i++)
+  if (! operand_equal_p (expr0->ops.phi.args[i],
+ expr1->ops.phi.args[i], 0))
+return false;
+
+return true;
+  }
+
 default:
   gcc_unreachable ();
 }
@@ -516,6 +546,15 @@ iterative_hash_hashable_expr (const struct hashabl
   }
   break;
 
+case EXPR_PHI:
+  {
+size_t i;
+
+for (i = 0; i < expr->ops.phi.nargs; i++)
+  val = iterative_hash_expr (expr->ops.phi.args[i], val);
+  }
+  break;
+
 default:
   gcc_unreachable ();
 }
@@ -588,6 +627,22 @@ print_expr_hash_elt (FILE * stream, const struct e
   fprintf (stream, ")");
 }
 break;
+
+  case EXPR_PHI:
+{
+  size_t i;
+  size_t nargs = element->expr.ops.phi.nargs;
+
+  fprintf (stream, "PHI <");
+  for (i = 0; i < nargs; i++)
+{
+  print_generic_expr (stream, element->expr.ops.phi.args[i], 0);
+  if (i + 1 < nargs)
+fprintf (stream, ", ");
+}
+  fprintf (stream, ">");
+}
+break;
 }
   fprintf (stream, "\n");
 
@@ -1688,6 +1743,10 @@ dom_opt_enter_block (struct dom_walk_data *walk_da
   /* PHI nodes can create equivalences too.  */
   record_equivalences_from_phis (bb);
 
+  /* Create equivalences from redundant PHIs.  */
+  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+eliminate_redundant_computations (&gsi);
+
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 optimize_stmt (bb, gsi);
 
@@ -1818,12 +1877,16 @@ eliminate_redundant_computations (gimple_stmt_iter
 {
   tree expr_type;
   tree cached_lhs;
+  tree def;
   bool insert = true;
   bool assigns_var_p = false;
 
   gimple stmt = gsi_stmt (*gsi);
 
-  tree def = gimple_get_lhs (stmt);
+  if (gimple_code (stmt) == GIMPLE_PHI)
+def = gimple_phi_result (stmt);
+  else
+def = gimple_get_lhs (stmt);
 
   /* Certain expressions on the RHS can be optimized away, but can not
  themselves be entered into the hash tables.  */
@@ -1857,6 +1920,21 @@ eliminate_redundant_computations (gimple_stmt_iter
 }
   else if (gimple_code (stmt) == GIMPLE_SWITCH)
 expr_type = TREE_TYPE (gimple_switch_index (stmt));
+  else if (gimple_code (stmt) == GIMPLE_PHI)
+/* We can't propagate into a phi, so the logic below doesn't apply.
+

[v3] Fix libstdc++/51288 thinko

2011-12-02 Thread Paolo Carlini

Hi,

this fixes a thinko in my recent fix for the PR: just be consistent with 
our other inserters.


Testing x86_64-linux, will commit soon.

Thanks,
Paolo.

///
2011-12-02  Paolo Carlini  

* include/std/iomanip (put_money): Fix thinko, use __err local,
like in, eg, basic_ostream::_M_insert.
Index: include/std/iomanip
===
--- include/std/iomanip (revision 181928)
+++ include/std/iomanip (working copy)
@@ -282,7 +282,7 @@
}
  __catch(...)
{ __is._M_setstate(ios_base::badbit); }
- if (ios_base::goodbit != __err)
+ if (__err)
__is.setstate(__err);
}
   return __is; 
@@ -312,15 +312,16 @@
   typename basic_ostream<_CharT, _Traits>::sentry __cerb(__os);
   if (__cerb)
{
+ ios_base::iostate __err = ios_base::goodbit;
  __try
{
  typedef ostreambuf_iterator<_CharT, _Traits>   _Iter;
  typedef money_put<_CharT, _Iter>   _MoneyPut;
+
  const _MoneyPut& __mp = use_facet<_MoneyPut>(__os.getloc());
- const _Iter __end = __mp.put(_Iter(__os.rdbuf()), __f._M_intl,
-  __os, __os.fill(), __f._M_mon);
- if (__end.failed())
-   __os.setstate(ios_base::badbit);
+ if (__mp.put(_Iter(__os.rdbuf()), __f._M_intl, __os,
+  __os.fill(), __f._M_mon).failed())
+   __err |= ios_base::badbit;
}
  __catch(__cxxabiv1::__forced_unwind&)
{
@@ -329,6 +330,8 @@
}
  __catch(...)
{ __os._M_setstate(ios_base::badbit); }
+ if (__err)
+   __os.setstate(__err);
}
   return __os; 
 }


Re: [Patch] Increase array sizes in vect-tests to enable 256-bit vectorization

2011-12-02 Thread Ira Rosen

gcc-patches-ow...@gcc.gnu.org wrote on 02/12/2011 06:23:25 PM:

> Hi,
>
> This patch increases array sizes in tests from vect.exp suite, thus
> enabling 256-bit vectorization where it's available.
>
> Ok for trunk?

--- a/gcc/testsuite/gcc.dg/vect/slp-24.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-24.c
...
@@ -13,14 +12,17 @@ typedef struct {
unsigned char d;
 } s;

-unsigned char ub[N*2] =
{1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
-unsigned char uc[N] = {1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+unsigned char ub[N*2];
+unsigned char uc[N];
+
+volatile int y = 0;
+unsigned char check_diff = 0;

 void
 main1 (unsigned char x, unsigned char max_result, unsigned char
min_result, s *arr)
 {
   int i;
-  unsigned char udiff = 2;
+  unsigned char udiff = 0;

Please don't change initial values to 0, we want to check that everything
works fine for non-zeros as well.
There are several other occasions in the patch.

Thanks,
Ira

>
> Changelog:
> 2011-12-02  Michael Zolotukhin  
>
>* gcc.dg/vect/slp-13.c: Increase array size, add initialization.
>* gcc.dg/vect/slp-24.c: Ditto.
>* gcc.dg/vect/slp-3.c: Likewise and fix scans.
>* gcc.dg/vect/slp-34.c: Ditto.
>* gcc.dg/vect/slp-4.c: Ditto.
>* gcc.dg/vect/slp-cond-2.c: Ditto.
>* gcc.dg/vect/slp-multitypes-11.c: Ditto.
>* gcc.dg/vect/vect-1.c: Ditto.
>* gcc.dg/vect/vect-10.c: Ditto.
>* gcc.dg/vect/vect-105.c: Ditto.
>* gcc.dg/vect/vect-112.c: Ditto.
>* gcc.dg/vect/vect-15.c: Ditto.
>* gcc.dg/vect/vect-2.c: Ditto.
>* gcc.dg/vect/vect-31.c: Ditto.
>* gcc.dg/vect/vect-32.c: Ditto.
>* gcc.dg/vect/vect-33.c: Ditto.
>* gcc.dg/vect/vect-34.c: Ditto.
>* gcc.dg/vect/vect-35.c: Ditto.
>* gcc.dg/vect/vect-36.c: Ditto.
>* gcc.dg/vect/vect-6.c: Ditto.
>* gcc.dg/vect/vect-73.c: Ditto.
>* gcc.dg/vect/vect-74.c: Ditto.
>* gcc.dg/vect/vect-75.c: Ditto.
>* gcc.dg/vect/vect-76.c: Ditto.
>* gcc.dg/vect/vect-80.c: Ditto.
>* gcc.dg/vect/vect-85.c: Ditto.
>* gcc.dg/vect/vect-89.c: Ditto.
>* gcc.dg/vect/vect-97.c: Ditto.
>* gcc.dg/vect/vect-98.c: Ditto.
>* gcc.dg/vect/vect-all.c: Ditto.
>* gcc.dg/vect/vect-double-reduc-6.c: Ditto.
>* gcc.dg/vect/vect-iv-8.c: Ditto.
>* gcc.dg/vect/vect-iv-8a.c: Ditto.
>* gcc.dg/vect/vect-outer-1.c: Ditto.
>* gcc.dg/vect/vect-outer-1a.c: Ditto.
>* gcc.dg/vect/vect-outer-1b.c: Ditto.
>* gcc.dg/vect/vect-outer-2.c: Ditto.
>* gcc.dg/vect/vect-outer-2a.c: Ditto.
>* gcc.dg/vect/vect-outer-2c.c: Ditto.
>* gcc.dg/vect/vect-outer-3.c: Ditto.
>* gcc.dg/vect/vect-outer-3a.c: Ditto.
>* gcc.dg/vect/vect-outer-4a.c: Ditto.
>* gcc.dg/vect/vect-outer-4b.c: Ditto.
>* gcc.dg/vect/vect-outer-4c.c: Ditto.
>* gcc.dg/vect/vect-outer-4d.c: Ditto.
>* gcc.dg/vect/vect-outer-4m.c: Ditto.
>* gcc.dg/vect/vect-outer-fir-lb.c: Ditto.
>* gcc.dg/vect/vect-outer-fir.c: Ditto.
>* gcc.dg/vect/vect-over-widen-1.c: Ditto.
>* gcc.dg/vect/vect-over-widen-2.c: Ditto.
>* gcc.dg/vect/vect-over-widen-3.c: Ditto.
>* gcc.dg/vect/vect-over-widen-4.c: Ditto.
>* gcc.dg/vect/vect-reduc-1char.c: Ditto.
>* gcc.dg/vect/vect-reduc-2char.c: Ditto.
>* gcc.dg/vect/vect-reduc-pattern-1b.c: Ditto.
>* gcc.dg/vect/vect-reduc-pattern-1c.c: Ditto.
>* gcc.dg/vect/vect-reduc-pattern-2b.c: Ditto.
>* gcc.dg/vect/vect-shift-2.c: Ditto.
>* gcc.dg/vect/vect-strided-a-u8-i8-gap2.c: Ditto.
>* gcc.dg/vect/vect-strided-a-u8-i8-gap7.c: Ditto.
>* gcc.dg/vect/vect-strided-u8-i8-gap2.c: Ditto.
>* gcc.dg/vect/vect-strided-u8-i8-gap4.c: Ditto.
>* gcc.dg/vect/vect-strided-u8-i8-gap7.c: Ditto.
>
> --
> ---
> Best regards,
> Michael V. Zolotukhin,
> Software Engineer
> Intel Corporation.
> [attachment "vect_tests.patch" deleted by Ira Rosen/Haifa/IBM]



Re: rs6000 options change for rtems.h

2011-12-02 Thread Joel Sherrill

On 12/02/2011 10:38 AM, Joseph S. Myers wrote:

On Fri, 2 Dec 2011, Joel Sherrill wrote:


2011-12-02  Joel Sherrill

 * config/rs6000/rtems.h: Switch to using global_options_set
 in SUBSUBTARGET_OVERRIDE_OPTIONS.

Is it deliberate that you are removing the first part of each "if"
condition (thus, no longer checking TARGET_HARD_FLOAT before setting
rs6000_float_gprs, no longer checking rs6000_float_gprs before setting
rs6000_spe, etc.)?


I patterned this after what was in other files.
It is done this way everywhere it is referenced.

Should all of them be changed?

$ grep global_options_set.x_rs6000_float_gprs *
e500-double.h:  if (!global_options_set.x_rs6000_float_gprs) \
eabispe.h:  if (!global_options_set.x_rs6000_float_gprs) \
linuxspe.h:  if (!global_options_set.x_rs6000_float_gprs) \
rs6000.c: if (!global_options_set.x_rs6000_float_gprs)
rtems.h:if (!global_options_set.x_rs6000_float_gprs) \

--
Joel Sherrill, Ph.D. Director of Research&  Development
joel.sherr...@oarcorp.comOn-Line Applications Research
Ask me about RTEMS: a free RTOS  Huntsville AL 35805
   Support Available (256) 722-9985




Re: rs6000 options change for rtems.h

2011-12-02 Thread Joseph S. Myers
On Fri, 2 Dec 2011, Joel Sherrill wrote:

> 2011-12-02  Joel Sherrill 
> 
> * config/rs6000/rtems.h: Switch to using global_options_set
> in SUBSUBTARGET_OVERRIDE_OPTIONS.

Is it deliberate that you are removing the first part of each "if" 
condition (thus, no longer checking TARGET_HARD_FLOAT before setting 
rs6000_float_gprs, no longer checking rs6000_float_gprs before setting 
rs6000_spe, etc.)?

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH][2/2] Remove gimple_call_cannot_inline

2011-12-02 Thread Richard Guenther

This is the 2nd piece of the stmt flag removal.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

I'll apply the short series tomorrow unless there are any objections
or comments.

Thanks,
Richard.

2011-12-02  Richard Guenther  

* cgraph.c (cgraph_create_edge_1): Initialize
call_stmt_cannot_inline_p from the stmt if possible.
(cgraph_make_edge_direct): Likewise.
* gimple-streamer-in.c (input_gimple_stmt): Do not
call gimple_call_set_cannot_inline.
* gimple.h (enum gf_mask): Remove GF_CALL_CANNOT_INLINE, shift
values.
(gimple_call_set_cannot_inline): Remove.
(gimple_call_cannot_inline_p): Likewise.
* ipa-inline-analysis.c (initialize_inline_failed): Look
at the edge call_stmt_cannot_inline_p flag.
* ipa-inline.c (can_inline_edge_p): Likewise.
(early_inliner): Only update the edge flag.
* ipa-prop.c (update_indirect_edges_after_inlining): Likewise.
(ipa_modify_call_arguments): Do not call gimple_call_set_cannot_inline.
* cgraphunit.c (assemble_thunk): Likewise.
* gimple-fold.c (gimple_fold_call): Likewise.

Index: trunk/gcc/cgraph.c
===
*** trunk.orig/gcc/cgraph.c 2011-12-02 16:34:42.0 +0100
--- trunk/gcc/cgraph.c  2011-12-02 16:39:50.0 +0100
*** cgraph_create_edge_1 (struct cgraph_node
*** 988,995 
edge->can_throw_external
  = call_stmt ? stmt_can_throw_external (call_stmt) : false;
pop_cfun ();
!   edge->call_stmt_cannot_inline_p =
! (call_stmt ? gimple_call_cannot_inline_p (call_stmt) : false);
if (call_stmt && caller->call_site_hash)
  cgraph_add_edge_to_call_site_hash (edge);
  
--- 988,999 
edge->can_throw_external
  = call_stmt ? stmt_can_throw_external (call_stmt) : false;
pop_cfun ();
!   if (call_stmt
!   && callee && callee->decl
!   && !gimple_check_call_matching_types (call_stmt, callee->decl))
! edge->call_stmt_cannot_inline_p = true;
!   else
! edge->call_stmt_cannot_inline_p = false;
if (call_stmt && caller->call_site_hash)
  cgraph_add_edge_to_call_site_hash (edge);
  
*** cgraph_make_edge_direct (struct cgraph_e
*** 1184,1195 
/* Insert to callers list of the new callee.  */
cgraph_set_edge_callee (edge, callee);
  
!   if (edge->call_stmt
!   && !gimple_check_call_matching_types (edge->call_stmt, callee->decl))
! {
!   gimple_call_set_cannot_inline (edge->call_stmt, true);
!   edge->call_stmt_cannot_inline_p = true;
! }
  
/* We need to re-determine the inlining status of the edge.  */
initialize_inline_failed (edge);
--- 1188,1196 
/* Insert to callers list of the new callee.  */
cgraph_set_edge_callee (edge, callee);
  
!   if (edge->call_stmt)
! edge->call_stmt_cannot_inline_p
!   = !gimple_check_call_matching_types (edge->call_stmt, callee->decl);
  
/* We need to re-determine the inlining status of the edge.  */
initialize_inline_failed (edge);
Index: trunk/gcc/gimple-streamer-in.c
===
*** trunk.orig/gcc/gimple-streamer-in.c 2011-12-02 16:34:42.0 +0100
--- trunk/gcc/gimple-streamer-in.c  2011-12-02 17:02:35.0 +0100
*** input_gimple_stmt (struct lto_input_bloc
*** 219,236 
}
if (is_gimple_call (stmt))
{
- tree fndecl;
  if (gimple_call_internal_p (stmt))
gimple_call_set_internal_fn
  (stmt, streamer_read_enum (ib, internal_fn, IFN_LAST));
  else
gimple_call_set_fntype (stmt, stream_read_tree (ib, data_in));
- /* Update the non-inlinable flag conservatively.  */
- fndecl = gimple_call_fndecl (stmt);
- if (fndecl
- && !gimple_call_cannot_inline_p (stmt)
- && !gimple_check_call_matching_types (stmt, fndecl))
-   gimple_call_set_cannot_inline (stmt, true);
}
break;
  
--- 219,229 
Index: trunk/gcc/gimple.h
===
*** trunk.orig/gcc/gimple.h 2011-12-02 16:34:42.0 +0100
--- trunk/gcc/gimple.h  2011-12-02 16:35:07.0 +0100
*** enum gimple_rhs_class
*** 97,110 
  enum gf_mask {
  GF_ASM_INPUT  = 1 << 0,
  GF_ASM_VOLATILE   = 1 << 1,
! GF_CALL_CANNOT_INLINE = 1 << 0,
! GF_CALL_FROM_THUNK= 1 << 1,
! GF_CALL_RETURN_SLOT_OPT   = 1 << 2,
! GF_CALL_TAILCALL  = 1 << 3,
! GF_CALL_VA_ARG_PACK   = 1 << 4,
! GF_CALL_NOTHROW   = 1 << 5,
! GF_CALL_ALLOCA_FOR_VAR= 1 << 6,
! GF_CALL_INTERNAL  = 1 << 7,
  GF_OMP_PARALLEL_COMBINED  = 1 << 0,
  
  /* True on an GIMPLE_OMP_RETURN statement if the return does not require
--- 97,109 
  enum gf_m

Re: [Patch] Increase array sizes in vect-tests to enable 256-bit vectorization

2011-12-02 Thread Richard Guenther
2011/12/2 Michael Zolotukhin :
> Hi,
>
> This patch increases array sizes in tests from vect.exp suite, thus
> enabling 256-bit vectorization where it's available.
>
> Ok for trunk?

Shouldn't we add a variant for each testcase so that we still
excercise both 128-bit and 256-bit vectorization paths?

> Changelog:
> 2011-12-02  Michael Zolotukhin  
>
>        * gcc.dg/vect/slp-13.c: Increase array size, add initialization.
>        * gcc.dg/vect/slp-24.c: Ditto.
>        * gcc.dg/vect/slp-3.c: Likewise and fix scans.
>        * gcc.dg/vect/slp-34.c: Ditto.
>        * gcc.dg/vect/slp-4.c: Ditto.
>        * gcc.dg/vect/slp-cond-2.c: Ditto.
>        * gcc.dg/vect/slp-multitypes-11.c: Ditto.
>        * gcc.dg/vect/vect-1.c: Ditto.
>        * gcc.dg/vect/vect-10.c: Ditto.
>        * gcc.dg/vect/vect-105.c: Ditto.
>        * gcc.dg/vect/vect-112.c: Ditto.
>        * gcc.dg/vect/vect-15.c: Ditto.
>        * gcc.dg/vect/vect-2.c: Ditto.
>        * gcc.dg/vect/vect-31.c: Ditto.
>        * gcc.dg/vect/vect-32.c: Ditto.
>        * gcc.dg/vect/vect-33.c: Ditto.
>        * gcc.dg/vect/vect-34.c: Ditto.
>        * gcc.dg/vect/vect-35.c: Ditto.
>        * gcc.dg/vect/vect-36.c: Ditto.
>        * gcc.dg/vect/vect-6.c: Ditto.
>        * gcc.dg/vect/vect-73.c: Ditto.
>        * gcc.dg/vect/vect-74.c: Ditto.
>        * gcc.dg/vect/vect-75.c: Ditto.
>        * gcc.dg/vect/vect-76.c: Ditto.
>        * gcc.dg/vect/vect-80.c: Ditto.
>        * gcc.dg/vect/vect-85.c: Ditto.
>        * gcc.dg/vect/vect-89.c: Ditto.
>        * gcc.dg/vect/vect-97.c: Ditto.
>        * gcc.dg/vect/vect-98.c: Ditto.
>        * gcc.dg/vect/vect-all.c: Ditto.
>        * gcc.dg/vect/vect-double-reduc-6.c: Ditto.
>        * gcc.dg/vect/vect-iv-8.c: Ditto.
>        * gcc.dg/vect/vect-iv-8a.c: Ditto.
>        * gcc.dg/vect/vect-outer-1.c: Ditto.
>        * gcc.dg/vect/vect-outer-1a.c: Ditto.
>        * gcc.dg/vect/vect-outer-1b.c: Ditto.
>        * gcc.dg/vect/vect-outer-2.c: Ditto.
>        * gcc.dg/vect/vect-outer-2a.c: Ditto.
>        * gcc.dg/vect/vect-outer-2c.c: Ditto.
>        * gcc.dg/vect/vect-outer-3.c: Ditto.
>        * gcc.dg/vect/vect-outer-3a.c: Ditto.
>        * gcc.dg/vect/vect-outer-4a.c: Ditto.
>        * gcc.dg/vect/vect-outer-4b.c: Ditto.
>        * gcc.dg/vect/vect-outer-4c.c: Ditto.
>        * gcc.dg/vect/vect-outer-4d.c: Ditto.
>        * gcc.dg/vect/vect-outer-4m.c: Ditto.
>        * gcc.dg/vect/vect-outer-fir-lb.c: Ditto.
>        * gcc.dg/vect/vect-outer-fir.c: Ditto.
>        * gcc.dg/vect/vect-over-widen-1.c: Ditto.
>        * gcc.dg/vect/vect-over-widen-2.c: Ditto.
>        * gcc.dg/vect/vect-over-widen-3.c: Ditto.
>        * gcc.dg/vect/vect-over-widen-4.c: Ditto.
>        * gcc.dg/vect/vect-reduc-1char.c: Ditto.
>        * gcc.dg/vect/vect-reduc-2char.c: Ditto.
>        * gcc.dg/vect/vect-reduc-pattern-1b.c: Ditto.
>        * gcc.dg/vect/vect-reduc-pattern-1c.c: Ditto.
>        * gcc.dg/vect/vect-reduc-pattern-2b.c: Ditto.
>        * gcc.dg/vect/vect-shift-2.c: Ditto.
>        * gcc.dg/vect/vect-strided-a-u8-i8-gap2.c: Ditto.
>        * gcc.dg/vect/vect-strided-a-u8-i8-gap7.c: Ditto.
>        * gcc.dg/vect/vect-strided-u8-i8-gap2.c: Ditto.
>        * gcc.dg/vect/vect-strided-u8-i8-gap4.c: Ditto.
>        * gcc.dg/vect/vect-strided-u8-i8-gap7.c: Ditto.
>
> --
> ---
> Best regards,
> Michael V. Zolotukhin,
> Software Engineer
> Intel Corporation.


rs6000 options change for rtems.h

2011-12-02 Thread Joel Sherrill

Hi,

I have been testing with this for almost a month.  It is
my attempt to follow the changes I think Joseph made
to other rs6000 targets. If this change looks right,
I would like to commit it.

Test results have been posted for it.

Thanks.

2011-12-02  Joel Sherrill 

* config/rs6000/rtems.h: Switch to using global_options_set
in SUBSUBTARGET_OVERRIDE_OPTIONS.


--
Joel Sherrill, Ph.D. Director of Research&  Development
joel.sherr...@oarcorp.comOn-Line Applications Research
Ask me about RTEMS: a free RTOS  Huntsville AL 35805
   Support Available (256) 722-9985


Index: gcc/config/rs6000/rtems.h
===
--- gcc/config/rs6000/rtems.h   (revision 181924)
+++ gcc/config/rs6000/rtems.h   (working copy)
@@ -57,15 +57,15 @@
   { "cpp_os_rtems",CPP_OS_RTEMS_SPEC }
 
 #undef SUBSUBTARGET_OVERRIDE_OPTIONS
-#define SUBSUBTARGET_OVERRIDE_OPTIONS  \
-  do { \
-if (TARGET_E500)   \
-  {
\
-if (TARGET_HARD_FLOAT && !rs6000_explicit_options.float_gprs)  \
-  rs6000_float_gprs = 1;   \
-if (rs6000_float_gprs != 0 && !rs6000_explicit_options.spe)\
-  rs6000_spe = 1;  \
-if (rs6000_spe && !rs6000_explicit_options.spe_abi)\
-  rs6000_spe_abi = 1;  \
-  }
\
+#define SUBSUBTARGET_OVERRIDE_OPTIONS   \
+  do {  \
+   if (TARGET_E500)  \
+  {  \
+if (!global_options_set.x_rs6000_float_gprs) \
+  rs6000_float_gprs = 1; \
+if (!global_options_set.x_rs6000_spe)\
+  rs6000_spe = 1;\
+if (!global_options_set.x_rs6000_spe_abi)\
+  rs6000_spe_abi = 1;\
+  }  \
   } while(0)


[PATCH] PRs c++/51239, c++/51180 - Better support for unbound alias template specialization

2011-12-02 Thread Dodji Seketeli
Hello,

I have conflated the handling of two PRs here, because I think they
are related.

Consider this short example that illustrates the issue of PR
c++/51239:

struct S {};

template
using head = T;

template
using x = head;//#1

In #1, we want to be able to represent 'head', in such a way
that the Ts... argument is not substituted for the parameter of f the
"head" template.  Because the pack expansion Ts... means that we don't
yet have the proper arguments to "apply" to the head template.  Later
when we have those arguments, for instance:

x i0;

we can proceed with substituting the argument pack [int, char] into
the pack expansion Ts... to get a set of argument {int, char} that
we'll apply to the head template, to get "int".

To date we don't have such an "unbound alias template specialization"
representation, because we leverage on the fact that "head" is an
alias template to substitute its arguments into its underlying type
directly.  Doing that in the present case (the argument being a pack
expansion) just wreaks havoc as the PR can attest.

After talking with you offline, we settled on using the existing
BOUND_TEMPLATE_TEMPLATE_PARM tree to represent this new construct.
The alias template and its unbound arguments are stored in the
TYPE_TEMPLATE_INFO of the tree and its TYPE_NAME has the
TYPE_DECL_ALIAS_P flag set.

So now, during the process of building a template specialization,
coerce_template_parms detects that we don't have all the arguments yet
- that is, when one of the arguments is a pack expansion.  In that
case lookup_template_class_1, if the template is an alias template,
builds an unbound alias template specialization and returns it.

We also support substituting for an unbound alias template
specialization.

The problem in PR c++/51180 is that sometimes coerce_template_parms
won't let us build a type "foo" if the template foo has more
than one parameter and no parameter pack.  We then end up in the "if"
below, and error out:

  if ((nargs > nparms && !variadic_p)
  || (nargs < nparms - variadic_p 
  && require_all_args
  && (!use_default_args
  || (TREE_VEC_ELT (parms, nargs) != error_mark_node
  && !TREE_PURPOSE (TREE_VEC_ELT (parms, nargs))
{

So the patch below tries to fix that as well.

Incidentally, I noticed that the test g++.dg/cpp0x/alias-decl-15.C
should actually be considered valid.  I wrongly thought otherwise at
that time.  I have thus adjusted that test case accordingly.

Bootstrapped and tested on x86_64-unknown-linux-gnu against trunk.

gcc/cp/

PR c++/51239
PR c++/51180
* cp-tree.h (UNBOUND_ALIAS_TEMPLATE_P): New
predicate.
* pt.c (build_unbound_alias_template): New.
(coerce_template_parms): Take a new out parameter flag about if
the actual number of arguments is unknown.  Make the template
unbound if one of its arguments is a pack expansion.
(lookup_template_class_1): Adjust for new argument to
coerce_template_parms.  Build an unbound alias template if the
number of arguments is not known yet.
(tsubst): Handle unbound alias
templates.
(fn_type_unification, get_bindings, most_specialized_class):
Adjust for new argument to coerce_template_parms.

gcc/testsuite/

PR c++/51239
PR c++/51180
* g++.dg/cpp0x/alias-decl-18.C: New test.
* g++.dg/cpp0x/alias-decl-19.C: Likewise.
* g++.dg/cpp0x/alias-decl-15.C: This was wrongly expected
to fail before.  Adjust accordingly.
---
 gcc/cp/cp-tree.h   |   10 +++
 gcc/cp/pt.c|  108 +---
 gcc/testsuite/g++.dg/cpp0x/alias-decl-15.C |   10 ++--
 gcc/testsuite/g++.dg/cpp0x/alias-decl-18.C |   29 
 gcc/testsuite/g++.dg/cpp0x/alias-decl-19.C |   19 +
 5 files changed, 161 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-18.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-19.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 3f4f408..0dcebd6 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -3662,6 +3662,16 @@ more_aggr_init_expr_args_p (const 
aggr_init_expr_arg_iterator *iter)
   (DECL_TYPE_TEMPLATE_P (NODE) \
&& !DECL_ARTIFICIAL (DECL_TEMPLATE_RESULT (NODE)))
 
+/* Nonzero for a node representing an alias template specialization in
+   which the arguments are not yet applied to the alias template.
+   This is used in cases where the arguments are not all fully known
+   yet, and can be applied later when they are.  */
+#define UNBOUND_ALIAS_TEMPLATE_P(NODE) \
+  ((NODE)  \
+   && TREE_CODE (NODE) == BOUND_TEMPLATE_TEMPLATE_PARM \
+   && DECL_ALIAS_TEMPLATE_P (TYPE_TI_TEMPLATE (NODE))  \
+   && TYPE_DECL_ALIAS_P (TYPE_NAM

[PATCH][1/2] Remove CALL_CANNOT_INLINE_P

2011-12-02 Thread Richard Guenther

This removes the CALL_CANNOT_INLINE_P tree flag.  As discussed in
the thread about the duplicate edge/gimple stmt flag.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2011-12-02  Richard Guenther  

* tree.h (CALL_CANNOT_INLINE_P): Remove.
* tree-mudflap.c (mf_xform_statements): Do not modify alloca calls.
* builtins.c (expand_builtin_alloca): With -fmudflap do not expand
alloca calls inline.
* cfgexpand.c (expand_call_stmt): Do not set CALL_CANNOT_INLINE_P.
* gimple.c (gimple_build_call_from_tree): Do not read
CALL_CANNOT_INLINE_P.
* gimplify.c (gimplify_call_expr): Do not copy CALL_CANNOT_INLINE_P.

Index: gcc/tree.h
===
*** gcc/tree.h  (revision 181902)
--- gcc/tree.h  (working copy)
*** struct GTY(()) tree_common {
*** 533,541 
 CASE_HIGH_SEEN in
 CASE_LABEL_EXPR
  
-CALL_CANNOT_INLINE_P in
-CALL_EXPR
-  
 ENUM_IS_SCOPED in
   ENUMERAL_TYPE
  
--- 533,538 
*** extern void omp_clause_range_check_faile
*** 1245,1253 
  #define CASE_HIGH_SEEN(NODE) \
(CASE_LABEL_EXPR_CHECK (NODE)->base.static_flag)
  
- /* Used to mark a CALL_EXPR as not suitable for inlining.  */
- #define CALL_CANNOT_INLINE_P(NODE) (CALL_EXPR_CHECK (NODE)->base.static_flag)
- 
  /* Used to mark scoped enums.  */
  #define ENUM_IS_SCOPED(NODE) (ENUMERAL_TYPE_CHECK (NODE)->base.static_flag)
  
--- 1242,1247 
Index: gcc/tree-mudflap.c
===
*** gcc/tree-mudflap.c  (revision 181902)
--- gcc/tree-mudflap.c  (working copy)
*** mf_xform_derefs_1 (gimple_stmt_iterator
*** 929,935 
  }
  /* Transform
 1) Memory references.
-2) BUILTIN_ALLOCA calls.
  */
  static void
  mf_xform_statements (void)
--- 929,934 
*** mf_xform_statements (void)
*** 970,985 
  }
break;
  
- case GIMPLE_CALL:
-   {
- tree fndecl = gimple_call_fndecl (s);
- if (fndecl && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_ALLOCA
-  || (DECL_FUNCTION_CODE (fndecl)
-  == BUILT_IN_ALLOCA_WITH_ALIGN)))
-   gimple_call_set_cannot_inline (s, true);
-   }
-   break;
- 
  default:
;
  }
--- 969,974 
Index: gcc/builtins.c
===
*** gcc/builtins.c  (revision 181902)
--- gcc/builtins.c  (working copy)
*** expand_builtin_alloca (tree exp, bool ca
*** 4523,4530 
bool alloca_with_align = (DECL_FUNCTION_CODE (get_callee_fndecl (exp))
== BUILT_IN_ALLOCA_WITH_ALIGN);
  
!   /* Emit normal call if marked not-inlineable.  */
!   if (CALL_CANNOT_INLINE_P (exp))
  return NULL_RTX;
  
valid_arglist
--- 4523,4530 
bool alloca_with_align = (DECL_FUNCTION_CODE (get_callee_fndecl (exp))
== BUILT_IN_ALLOCA_WITH_ALIGN);
  
!   /* Emit normal call if we use mudflap.  */
!   if (flag_mudflap)
  return NULL_RTX;
  
valid_arglist
Index: gcc/cfgexpand.c
===
*** gcc/cfgexpand.c (revision 181902)
--- gcc/cfgexpand.c (working copy)
*** expand_call_stmt (gimple stmt)
*** 2050,2056 
  CALL_ALLOCA_FOR_VAR_P (exp) = gimple_call_alloca_for_var_p (stmt);
else
  CALL_FROM_THUNK_P (exp) = gimple_call_from_thunk_p (stmt);
-   CALL_CANNOT_INLINE_P (exp) = gimple_call_cannot_inline_p (stmt);
CALL_EXPR_VA_ARG_PACK (exp) = gimple_call_va_arg_pack_p (stmt);
SET_EXPR_LOCATION (exp, gimple_location (stmt));
TREE_BLOCK (exp) = gimple_block (stmt);
--- 2050,2055 
Index: gcc/gimple.c
===
*** gcc/gimple.c(revision 181902)
--- gcc/gimple.c(working copy)
*** gimple_build_call_from_tree (tree t)
*** 370,376 
/* Carry all the CALL_EXPR flags to the new GIMPLE_CALL.  */
gimple_call_set_chain (call, CALL_EXPR_STATIC_CHAIN (t));
gimple_call_set_tail (call, CALL_EXPR_TAILCALL (t));
-   gimple_call_set_cannot_inline (call, CALL_CANNOT_INLINE_P (t));
gimple_call_set_return_slot_opt (call, CALL_EXPR_RETURN_SLOT_OPT (t));
if (fndecl
&& DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
--- 370,375 
Index: gcc/gimplify.c
===
*** gcc/gimplify.c  (revision 181902)
--- gcc/gimplify.c  (working copy)
*** gimplify_call_expr (tree *expr_p, gimple
*** 2449,2455 
  CALL_EXPR_RETURN_SLOT_OPT (*expr_p)
= CALL_EXPR_RETURN_SLOT_OPT (call);
  CALL_FROM_THUNK_P (*expr_p) =

Re: [Patch, i386] Limit unroll factor for certain loops on Corei7

2011-12-02 Thread Teresa Johnson
Thanks, Andreas. You are right in that fully peeling a loop is done by
a different code path (peel_loops_completely() and earlier in the tree
unroller).

Teresa

On Fri, Dec 2, 2011 at 12:54 AM, Andreas Krebbel
 wrote:
> On Thu, Dec 01, 2011 at 11:39:36PM -0800, Teresa Johnson wrote:
>> To do this I leveraged the existing TARGET_LOOP_UNROLL_ADJUST target
>> hook, which was previously only defined for s390. I added one
>> additional call to this target hook, when unrolling for constant trip
>> count loops. Previously it was only called for runtime computed trip
>> counts. Andreas, can you comment on the effect for s390 of this
>> additional call of the target hook, since I can't measure that?
>
> Limiting the unrolling of loops with constant iterations makes also
> sense for s390.  However, the limitations are only relevant if it
> actually stays a loop. If the loop gets completely peeled into a
> sequential instruction stream there should be no limitation. But as I
> understand it this will be done by different code paths.
>
> So I think the change should be ok for s390 as well. It will take some
> time to get measurements on that. I'll try to keep that in mind until
> then.
>
> Bye,
>
> -Andreas-
>



-- 
Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413


Re: Adjust omp-low test for alignment

2011-12-02 Thread Mikael Pettersson
Hans-Peter Nilsson writes:
 >   BTW, on the topic, I cringe whenever I
 > see futexes expressed as plain "int", they absolutely have to
 > have at least natural alignment which is not always true e.g. in
 > structs.  People, please keep the atomic types
 > target-overridable in libraries.

+1 for m68k-linux, where plain "int" only has 16-bit alignment
(by SW convention, Linux-capable HW tolerates 8-bit alignment),
but futexes must be 32-bit aligned (or at least not cross page
boundaries).


Re: RTEMS Specific Ada Patch

2011-12-02 Thread Joel Sherrill

On 12/02/2011 01:48 AM, Arnaud Charlet wrote:

The attached patch is necessary to let the gcc head
compile Ada for *-*-rtems*.  Other than terminals.c,
the files impacted are RTEMS specific.  OK to commit?

OK

Thanks.  Committed.

--
Joel Sherrill, Ph.D. Director of Research&  Development
joel.sherr...@oarcorp.comOn-Line Applications Research
Ask me about RTEMS: a free RTOS  Huntsville AL 35805
   Support Available (256) 722-9985




[Ada] Implement concrete iterators as a type hierarchy for multiway trees

2011-12-02 Thread Arnaud Charlet
The iterators for the multiway trees are now implemented as a type
hierarchy. Iterating over a tree is the same as iterating over a subtree
starting from the root, and so the tree iterator forwards the request to the
subtree iterator.

Tested on x86_64-pc-linux-gnu, committed on trunk

2011-12-02  Matthew Heaney  

* a-cbmutr.ads (No_Node): Moved declaration from body to spec
* a-comutr.adb, a-cimutr.adb, a-cbmutr.adb (Iterator): Derives
from Root_Iterator.
(Child_Iterator): Derives from Root_Iterator.
(Finalize): Implemented as an override operation for Root_Iterator.
(First): Return value depends on Subtree component.
(Last): Component was renamed from Parent to Subtree.
(Next): Checks parameter value, and uses simplified loop.
(Iterate): Forwards to Iterate_Subtree.
(Iterate_Children): Component was renamed from Parent to Subtree.
(Iterate_Subtree): Checks parameter value

Index: a-cimutr.adb
===
--- a-cimutr.adb(revision 181914)
+++ a-cimutr.adb(working copy)
@@ -33,41 +33,50 @@
 
 package body Ada.Containers.Indefinite_Multiway_Trees is
 
-   type Iterator is new Limited_Controlled and
+   
+   --  Root_Iterator --
+   
+
+   type Root_Iterator is abstract new Limited_Controlled and
  Tree_Iterator_Interfaces.Forward_Iterator with
record
   Container : Tree_Access;
-  Position  : Cursor;
-  From_Root : Boolean;
+  Subtree   : Tree_Node_Access;
end record;
 
-   type Child_Iterator is new Limited_Controlled and
- Tree_Iterator_Interfaces.Reversible_Iterator with
-   record
-  Container : Tree_Access;
-  Parent: Tree_Node_Access;
-   end record;
+   overriding procedure Finalize (Object : in out Root_Iterator);
 
-   overriding procedure Finalize (Object : in out Iterator);
+   ---
+   --  Subtree_Iterator --
+   ---
 
-   overriding function First (Object : Iterator) return Cursor;
+   type Subtree_Iterator is new Root_Iterator with null record;
+
+   overriding function First (Object : Subtree_Iterator) return Cursor;
+
overriding function Next
- (Object   : Iterator;
+ (Object   : Subtree_Iterator;
   Position : Cursor) return Cursor;
 
-   overriding procedure Finalize (Object : in out Child_Iterator);
+   -
+   --  Child_Iterator --
+   -
 
+   type Child_Iterator is new Root_Iterator and
+ Tree_Iterator_Interfaces.Reversible_Iterator with null record;
+
overriding function First (Object : Child_Iterator) return Cursor;
+
overriding function Next
  (Object   : Child_Iterator;
   Position : Cursor) return Cursor;
 
+   overriding function Last (Object : Child_Iterator) return Cursor;
+
overriding function Previous
  (Object   : Child_Iterator;
   Position : Cursor) return Cursor;
 
-   overriding function Last (Object : Child_Iterator) return Cursor;
-
---
-- Local Subprograms --
---
@@ -936,18 +945,12 @@
-- Finalize --
--
 
-   procedure Finalize (Object : in out Iterator) is
+   procedure Finalize (Object : in out Root_Iterator) is
   B : Natural renames Object.Container.Busy;
begin
   B := B - 1;
end Finalize;
 
-   procedure Finalize (Object : in out Child_Iterator) is
-  B : Natural renames Object.Container.Busy;
-   begin
-  B := B - 1;
-   end Finalize;
-
--
-- Find --
--
@@ -971,14 +974,18 @@
-- First --
---
 
-   function First (Object : Iterator) return Cursor is
+   overriding function First (Object : Subtree_Iterator) return Cursor is
begin
-  return Object.Position;
+  if Object.Subtree = Root_Node (Object.Container.all) then
+ return First_Child (Root (Object.Container.all));
+  else
+ return Cursor'(Object.Container, Object.Subtree);
+  end if;
end First;
 
-   function First (Object : Child_Iterator) return Cursor is
+   overriding function First (Object : Child_Iterator) return Cursor is
begin
-  return First_Child (Cursor'(Object.Container, Object.Parent));
+  return First_Child (Cursor'(Object.Container, Object.Subtree));
end First;
 
-
@@ -1348,18 +1355,8 @@
function Iterate (Container : Tree)
  return Tree_Iterator_Interfaces.Forward_Iterator'Class
is
-  B  : Natural renames Container'Unrestricted_Access.all.Busy;
-  RC : constant Cursor :=
- (Container'Unrestricted_Access, Root_Node (Container));
begin
-  return It : constant Iterator :=
-Iterator'(Limited_Controlled with
-Container => Container'Unrestricted_Access,
-Position  => First_Child (RC),
-   

[Ada] Generation of external and fully qualified names

2011-12-02 Thread Arnaud Charlet
This patch corrects a buffer issue which may lead to bogus expanded names at
link time. The problem is initiated while creating the external name of a
tagged type whose scope is an overloaded name. This places useless data in the
Homonym_Numbers buffer which is then reused when building the qualified name
of an arbitrary entity.

Tested on x86_64-pc-linux-gnu, committed on trunk

2011-12-02  Hristian Kirtchev  

* exp_dbug.adb: Comment reformatting.
(Get_External_Name): Use Reset_Buffers to reset the contents of
Name_Buffer and Homonym_Numbers.
(Qualify_All_Entity_Names): Reset the contents of Name_Buffer and
Homonym_Numbers before creating a new qualified name for a particular
entity.
(Reset_Buffers): New routine.

Index: exp_dbug.adb
===
--- exp_dbug.adb(revision 181910)
+++ exp_dbug.adb(working copy)
@@ -6,7 +6,7 @@
 --  --
 -- B o d y  --
 --  --
---  Copyright (C) 1996-2010, Free Software Foundation, Inc. --
+--  Copyright (C) 1996-2011, Free Software Foundation, Inc. --
 --  --
 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
 -- terms of the  GNU General Public License as published  by the Free Soft- --
@@ -105,11 +105,11 @@
-- Homonym_Suffix --

 
-   --  The string defined here (and its associated length) is used to
-   --  gather the homonym string that will be appended to Name_Buffer
-   --  when the name is complete. Strip_Suffixes appends to this string
-   --  as does Append_Homonym_Number, and Output_Homonym_Numbers_Suffix
-   --  appends the string to the end of Name_Buffer.
+   --  The string defined here (and its associated length) is used to gather
+   --  the homonym string that will be appended to Name_Buffer when the name
+   --  is complete. Strip_Suffixes appends to this string as does
+   --  Append_Homonym_Number, and Output_Homonym_Numbers_Suffix appends the
+   --  string to the end of Name_Buffer.
 
Homonym_Numbers : String (1 .. 256);
Homonym_Len : Natural := 0;
@@ -147,6 +147,10 @@
--  If not already done, replaces the Chars field of the given entity
--  with the appropriate fully qualified name.
 
+   procedure Reset_Buffers;
+   --  Reset the contents of Name_Buffer and Homonym_Numbers by setting their
+   --  respective lengths to zero.
+
procedure Strip_Suffixes (BNPE_Suffix_Found : in out Boolean);
--  Given an qualified entity name in Name_Buffer, remove any plain X or
--  X{nb} qualification suffix. The contents of Name_Buffer is not changed
@@ -701,8 +705,7 @@
--  Start of processing for Get_External_Name
 
begin
-  Name_Len:= 0;
-  Homonym_Len := 0;
+  Reset_Buffers;
 
   --  If this is a child unit, we want the child
 
@@ -1022,6 +1025,7 @@
begin
   for J in Name_Qualify_Units.First .. Name_Qualify_Units.Last loop
  E := Defining_Entity (Name_Qualify_Units.Table (J));
+ Reset_Buffers;
  Qualify_Entity_Name (E);
 
  --  Normally entities in the qualification list are scopes, but in the
@@ -1033,6 +1037,7 @@
  if Ekind (E) /= E_Variable then
 Ent := First_Entity (E);
 while Present (Ent) loop
+   Reset_Buffers;
Qualify_Entity_Name (Ent);
Next_Entity (Ent);
 
@@ -1101,10 +1106,10 @@
  if No (E) then
 return;
 
- --  If this we are qualifying entities local to a generic
- --  instance, use the name of the original instantiation,
- --  not that of the anonymous subprogram in the wrapper
- --  package, so that gdb doesn't have to know about these.
+ --  If this we are qualifying entities local to a generic instance,
+ --  use the name of the original instantiation, not that of the
+ --  anonymous subprogram in the wrapper package, so that gdb doesn't
+ --  have to know about these.
 
  elsif Is_Generic_Instance (E)
and then Is_Subprogram (E)
@@ -1394,6 +1399,16 @@
   Name_Qualify_Units.Append (N);
end Qualify_Entity_Names;
 
+   ---
+   -- Reset_Buffers --
+   ---
+
+   procedure Reset_Buffers is
+   begin
+  Name_Len:= 0;
+  Homonym_Len := 0;
+   end Reset_Buffers;
+

-- Strip_Suffixes --



[Ada] Minor code reorganization

2011-12-02 Thread Arnaud Charlet
This patch does not modify the functionality of the compiler. It
moves semantic routines from Sem_Util to Sem_Aux to have them
available in ASIS.

Tested on x86_64-pc-linux-gnu, committed on trunk

2011-12-02  Javier Miranda  

* sem_util.ads, sem_util.adb, sem_aux.ads, sem_aux.adb
(Effectively_Has_Constrained_Partial_View): Moved to sem_aux
(In_Generic_Body): Moved to sem_aux.
(Unit_Declaration_Node): Moved to sem_aux.
* einfo.ads (Effectively_Has_Constrained_Partial_View): Complete
documentation.
* exp_attr.adb, live.adb, sem_ch10.adb, checks.adb, sem.adb,
rtsfind.adb, sem_attr.adb, sem_elab.adb, exp_ch4.adb, sem_ch4.adb,
exp_ch13.adb: Add with-clause on Sem_Aux.

Index: sem_aux.adb
===
--- sem_aux.adb (revision 181910)
+++ sem_aux.adb (working copy)
@@ -152,6 +152,25 @@
   end if;
end Constant_Value;
 
+   --
+   -- Effectively_Has_Constrained_Partial_View --
+   --
+
+   function Effectively_Has_Constrained_Partial_View
+ (Typ  : Entity_Id;
+  Scop : Entity_Id) return Boolean
+   is
+   begin
+  return Has_Constrained_Partial_View (Typ)
+or else (In_Generic_Body (Scop)
+   and then Is_Generic_Type (Base_Type (Typ))
+   and then Is_Private_Type (Base_Type (Typ))
+   and then not Is_Tagged_Type (Typ)
+   and then not (Is_Array_Type (Typ)
+   and then not Is_Constrained (Typ))
+   and then Has_Discriminants (Typ));
+   end Effectively_Has_Constrained_Partial_View;
+
-
-- Enclosing_Dynamic_Scope --
-
@@ -419,6 +438,43 @@
end Initialize;
 
-
+   -- In_Generic_Body --
+   -
+
+   function In_Generic_Body (Id : Entity_Id) return Boolean is
+  S : Entity_Id;
+
+   begin
+  --  Climb scopes looking for generic body
+
+  S := Id;
+  while Present (S) and then S /= Standard_Standard loop
+
+ --  Generic package body
+
+ if Ekind (S) = E_Generic_Package
+   and then In_Package_Body (S)
+ then
+return True;
+
+ --  Generic subprogram body
+
+ elsif Is_Subprogram (S)
+   and then Nkind (Unit_Declaration_Node (S))
+  = N_Generic_Subprogram_Declaration
+ then
+return True;
+ end if;
+
+ S := Scope (S);
+  end loop;
+
+  --  False if top of scope stack without finding a generic body
+
+  return False;
+   end In_Generic_Body;
+
+   -
-- Is_By_Copy_Type --
-
 
@@ -904,4 +960,53 @@
   return E;
end Ultimate_Alias;
 
+   --
+   -- Unit_Declaration_Node --
+   --
+
+   function Unit_Declaration_Node (Unit_Id : Entity_Id) return Node_Id is
+  N : Node_Id := Parent (Unit_Id);
+
+   begin
+  --  Predefined operators do not have a full function declaration
+
+  if Ekind (Unit_Id) = E_Operator then
+ return N;
+  end if;
+
+  --  Isn't there some better way to express the following ???
+
+  while Nkind (N) /= N_Abstract_Subprogram_Declaration
+and then Nkind (N) /= N_Formal_Package_Declaration
+and then Nkind (N) /= N_Function_Instantiation
+and then Nkind (N) /= N_Generic_Package_Declaration
+and then Nkind (N) /= N_Generic_Subprogram_Declaration
+and then Nkind (N) /= N_Package_Declaration
+and then Nkind (N) /= N_Package_Body
+and then Nkind (N) /= N_Package_Instantiation
+and then Nkind (N) /= N_Package_Renaming_Declaration
+and then Nkind (N) /= N_Procedure_Instantiation
+and then Nkind (N) /= N_Protected_Body
+and then Nkind (N) /= N_Subprogram_Declaration
+and then Nkind (N) /= N_Subprogram_Body
+and then Nkind (N) /= N_Subprogram_Body_Stub
+and then Nkind (N) /= N_Subprogram_Renaming_Declaration
+and then Nkind (N) /= N_Task_Body
+and then Nkind (N) /= N_Task_Type_Declaration
+and then Nkind (N) not in N_Formal_Subprogram_Declaration
+and then Nkind (N) not in N_Generic_Renaming_Declaration
+  loop
+ N := Parent (N);
+
+ --  We don't use Assert here, because that causes an infinite loop
+ --  when assertions are turned off. Better to crash.
+
+ if No (N) then
+raise Program_Error;
+ end if;
+  end loop;
+
+  return N;
+   end Unit_Declaration_Node;
+
 end Sem_Aux;
Index: sem_aux.ads
===
--- sem_aux.ads (revision 181910)
+++ sem_aux.ads (working copy)
@@ -104,6 +104,14 @@
--  constants from the po

[Ada] Remove spurious warning in Alfa mode

2011-12-02 Thread Arnaud Charlet
The side effect removal machinery may generate illegal Ada code to avoid the
usage of access types and 'reference in Alfa mode. Since this is legal code
with respect to theorem proving, do not emit the warning.

Tested on x86_64-pc-linux-gnu, committed on trunk

2011-12-02  Yannick Moy  

* sem_ch3.adb (Check_Initialization): Do not emit warning on
initialization of limited type object in Alfa mode.

Index: sem_ch3.adb
===
--- sem_ch3.adb (revision 181914)
+++ sem_ch3.adb (working copy)
@@ -9704,10 +9704,26 @@
  ("?cannot initialize entities of limited type!", Exp);
 
 elsif Ada_Version < Ada_2005 then
-   Error_Msg_N
- ("cannot initialize entities of limited type", Exp);
-   Explain_Limited_Type (T, Exp);
 
+   --  The side effect removal machinery may generate illegal Ada
+   --  code to avoid the usage of access types and 'reference in
+   --  Alfa mode. Since this is legal code with respect to theorem
+   --  proving, do not emit the error.
+
+   if Alfa_Mode
+ and then Nkind (Exp) = N_Function_Call
+ and then Nkind (Parent (Exp)) = N_Object_Declaration
+ and then not Comes_From_Source
+(Defining_Identifier (Parent (Exp)))
+   then
+  null;
+
+   else
+  Error_Msg_N
+("cannot initialize entities of limited type", Exp);
+  Explain_Limited_Type (T, Exp);
+   end if;
+
 else
--  Specialize error message according to kind of illegal
--  initial expression.


[Ada] Check preconditions for child iterator of multiway tree container

2011-12-02 Thread Arnaud Charlet
The iterator for visiting children of a node in a multiway tree must check the
value of the Parent parameter to ensure that it is non-null, and that it
actually designates a node in the tree.

There were also several instances where cursor values returned by iterator
operations were not well-formed. That has been corrected by forwarding the
iterator operation to the corresponding cursor-based operation.

Tested on x86_64-pc-linux-gnu, committed on trunk

2011-12-02  Matthew Heaney  

* a-comutr.adb, a-cimutr.adb, a-cbmutr.adb (Iterator): Rename
Position component.
(Finalize): Remove unnecessary access check.
(First): Forward to First_Child.
(Last): Forward to Last_Child.
(Iterate): Check preconditions for parent node parameter.
(Next): Forward to Next_Sibling.
(Previous): Forward to Previous_Sibling.

Index: a-cimutr.adb
===
--- a-cimutr.adb(revision 181912)
+++ a-cimutr.adb(working copy)
@@ -45,7 +45,7 @@
  Tree_Iterator_Interfaces.Reversible_Iterator with
record
   Container : Tree_Access;
-  Position  : Cursor;
+  Parent: Tree_Node_Access;
end record;
 
overriding procedure Finalize (Object : in out Iterator);
@@ -937,25 +937,15 @@
--
 
procedure Finalize (Object : in out Iterator) is
+  B : Natural renames Object.Container.Busy;
begin
-  if Object.Container /= null then
- declare
-B : Natural renames Object.Container.all.Busy;
- begin
-B := B - 1;
- end;
-  end if;
+  B := B - 1;
end Finalize;
 
procedure Finalize (Object : in out Child_Iterator) is
+  B : Natural renames Object.Container.Busy;
begin
-  if Object.Container /= null then
- declare
-B : Natural renames Object.Container.all.Busy;
- begin
-B := B - 1;
- end;
-  end if;
+  B := B - 1;
end Finalize;
 
--
@@ -988,7 +978,7 @@
 
function First (Object : Child_Iterator) return Cursor is
begin
-  return (Object.Container, Object.Position.Node.Children.First);
+  return First_Child (Cursor'(Object.Container, Object.Parent));
end First;
 
-
@@ -1433,13 +1423,22 @@
   Parent: Cursor)
  return Tree_Iterator_Interfaces.Reversible_Iterator'Class
is
-  B : Natural renames Container'Unrestricted_Access.all.Busy;
+  C : constant Tree_Access := Container'Unrestricted_Access;
+  B : Natural renames C.Busy;
 
begin
+  if Parent = No_Element then
+ raise Constraint_Error with "Parent cursor has no element";
+  end if;
+
+  if Parent.Container /= C then
+ raise Program_Error with "Parent cursor not in container";
+  end if;
+
   return It : constant Child_Iterator :=
 Child_Iterator'(Limited_Controlled with
-  Container => Parent.Container,
-  Position  => Parent)
+  Container => C,
+  Parent=> Parent.Node)
   do
  B := B + 1;
   end return;
@@ -1516,7 +1515,7 @@
 
overriding function Last (Object : Child_Iterator) return Cursor is
begin
-  return (Object.Container, Object.Position.Node.Children.Last);
+  return Last_Child (Cursor'(Object.Container, Object.Parent));
end Last;
 

@@ -1646,18 +1645,20 @@
end Next;
 
function Next
- (Object : Child_Iterator;
+ (Object   : Child_Iterator;
   Position : Cursor) return Cursor
is
-  C : constant Tree_Node_Access := Position.Node.Next;
-
begin
-  if C = null then
+  if Position.Container = null then
  return No_Element;
+  end if;
 
-  else
- return (Object.Container, C);
+  if Position.Container /= Object.Container then
+ raise Program_Error with
+   "Position cursor of Next designates wrong tree";
   end if;
+
+  return Next_Sibling (Position);
end Next;
 
--
@@ -1787,18 +1788,20 @@
--
 
overriding function Previous
- (Object : Child_Iterator;
+ (Object   : Child_Iterator;
   Position : Cursor) return Cursor
is
-  C : constant Tree_Node_Access := Position.Node.Prev;
-
begin
-  if C = null then
+  if Position.Container = null then
  return No_Element;
+  end if;
 
-  else
- return (Object.Container, C);
+  if Position.Container /= Object.Container then
+ raise Program_Error with
+   "Position cursor of Previous designates wrong tree";
   end if;
+
+  return Previous_Sibling (Position);
end Previous;
 
--
Index: a-comutr.adb
===
--- a-com

[Ada] Ada 2012: Derived types and partial views

2011-12-02 Thread Arnaud Charlet
This patch incorporates the support for AI95-0041. For the purposes of the
rules for allowing allocated unconstrained objects, any ancestor that has
a constrained partial view causes the rules to apply.

In addition, in a generic body, 3.10.2(27.2/2) is checked assuming that any
untagged formal private or derived type has a constrained partial view.

The following test now compiles with an error:

procedure AI95_041  is
   subtype Index is Integer range 0 .. 255;
   Smaller_Index : constant Index := 10;
   Larger_Index  : constant Index := 20;

   generic
  type T1 (D : Index) is private;
   package G is
  type Ref is access all T1;
  Smaller : aliased T1 (Smaller_Index);
  Ptr_1   : Ref := Smaller'Access; -- Legal? (Yes.)
  Ptr : Ref;
end G;

package body G is
begin
   Ptr := Smaller'Access; -- Legal? (No.)
end G;

begin
   null;
end;

Command: gcc -c -gnat05 ai95_041.adb
Output:
ai95_041.adb:17:15: object subtype must statically match designated subtype

Tested on x86_64-pc-linux-gnu, committed on trunk

2011-12-02  Javier Miranda  

* sem_ch3.adb (Constrain_Access): Enable on Ada 2005 mode the
static check of the rule of general access types whose designated
type has discriminants.
* sem_util.ads, sem_util.adb
(Effectively_Has_Constrained_Partial_View): New subprogram.
(In_Generic_Body): New subprogram.
* einfo.ads (Has_Constrained_Partial_View): Adding documentation.
* sem_prag.adb (Inside_Generic_Body): Removed. Replaced by new
subprogram In_Generic_Body.
* exp_attr.adb, checks.adb, sem_attr.adb, exp_ch4.adb,
sem_ch4.adb: In addition, this patch replaces the occurrences of
Has_Constrained_Partial_View by
Effectively_Has_Constrained_Partial_View.

Index: sem_ch3.adb
===
--- sem_ch3.adb (revision 181910)
+++ sem_ch3.adb (working copy)
@@ -10674,8 +10674,7 @@
 return;
  end if;
 
- if (Ekind (T) = E_General_Access_Type
-  or else Ada_Version >= Ada_2005)
+ if Ekind (T) = E_General_Access_Type
and then Has_Private_Declaration (Desig_Type)
and then In_Open_Scopes (Scope (Desig_Type))
and then Has_Discriminants (Desig_Type)
@@ -10687,11 +10686,6 @@
 --  (Defect Report 8652/0008, Technical Corrigendum 1, checked
 --  by ACATS B371001).
 
---  Rule updated for Ada 2005: the private type is said to have
---  a constrained partial view, given that objects of the type
---  can be declared. Furthermore, the rule applies to all access
---  types, unlike the rule concerning default discriminants.
-
 declare
Pack  : constant Node_Id :=
  Unit_Declaration_Node (Scope (Desig_Type));
Index: exp_attr.adb
===
--- exp_attr.adb(revision 181910)
+++ exp_attr.adb(working copy)
@@ -1559,10 +1559,11 @@
return Is_Aliased_View (Obj)
 and then
   (Is_Constrained (Etype (Obj))
- or else (Nkind (Obj) = N_Explicit_Dereference
-and then
-  not Has_Constrained_Partial_View
-(Base_Type (Etype (Obj);
+ or else
+   (Nkind (Obj) = N_Explicit_Dereference
+  and then
+not Effectively_Has_Constrained_Partial_View
+  (Base_Type (Etype (Obj);
 end if;
  end Is_Constrained_Aliased_View;
 
@@ -1684,7 +1685,8 @@
 or else
  (Nkind (Pref) = N_Explicit_Dereference
and then
- not Has_Constrained_Partial_View (Base_Type (Ptyp)))
+ not Effectively_Has_Constrained_Partial_View
+   (Base_Type (Ptyp)))
 or else Is_Constrained (Underlying_Type (Ptyp))
 or else (Ada_Version >= Ada_2012
   and then Is_Tagged_Type (Underlying_Type (Ptyp))
Index: einfo.ads
===
--- einfo.ads   (revision 181910)
+++ einfo.ads   (working copy)
@@ -1420,6 +1420,8 @@
 --   type has no discriminants and the full view has discriminants with
 --   defaults. In Ada 2005 heap-allocated objects of such types are not
 --   constrained, and can change their discriminants with full assignment.
+--   Sem_Util.Effectively_Has_Constrained_Partial_View should be always
+--   used by callers, rather than reading this attribute directly.
 
 --

[Ada] Implement iterator for multiset containers

2011-12-02 Thread Arnaud Charlet
This change implements a reversible iterator for multiset containers.

An iterator can either be partial, visiting only some of the items in the
container (in which case the start position is specified), or complete,
visiting all of them.  The iterator caches the start position during its
construction, and that position value is used by First and Last to determine
their associated return values.

Tested on x86_64-pc-linux-gnu, committed on trunk

2011-12-02  Matthew Heaney  

* a-coormu.ads, a-ciormu.ads: Declare iterator factory function.
* a-ciormu.adb, a-ciormu.adb (Iterator): Declare concrete
Iterator type.
(Finalize): Decrement busy counter.
(First, Last): Cursor return value depends on iterator node value.
(Iterate): Use start position as iterator node value.
(Next, Previous): Forward to corresponding cursor-based operation.

Index: a-ciormu.adb
===
--- a-ciormu.adb(revision 181910)
+++ a-ciormu.adb(working copy)
@@ -42,6 +42,26 @@
 
 package body Ada.Containers.Indefinite_Ordered_Multisets is
 
+   type Iterator is new Limited_Controlled and
+ Set_Iterator_Interfaces.Reversible_Iterator with
+   record
+  Container : Set_Access;
+  Node  : Node_Access;
+   end record;
+
+   overriding procedure Finalize (Object : in out Iterator);
+
+   overriding function First (Object : Iterator) return Cursor;
+   overriding function Last  (Object : Iterator) return Cursor;
+
+   overriding function Next
+ (Object   : Iterator;
+  Position : Cursor) return Cursor;
+
+   overriding function Previous
+ (Object   : Iterator;
+  Position : Cursor) return Cursor;
+
-
-- Node Access Subprograms --
-
@@ -592,6 +612,17 @@
   return Cursor'(Container'Unrestricted_Access, Node);
end Find;
 
+   --
+   -- Finalize --
+   --
+
+   procedure Finalize (Object : in out Iterator) is
+  B : Natural renames Object.Container.Tree.Busy;
+  pragma Assert (B > 0);
+   begin
+  B := B - 1;
+   end Finalize;
+
---
-- First --
---
@@ -605,6 +636,28 @@
   return Cursor'(Container'Unrestricted_Access, Container.Tree.First);
end First;
 
+   function First (Object : Iterator) return Cursor is
+   begin
+  --  The value of the iterator object's Node component influences the
+  --  behavior of the First (and Last) selector function.
+
+  --  When the Node component is null, this means the iterator object was
+  --  constructed without a start expression, in which case the (forward)
+  --  iteration starts from the (logical) beginning of the entire sequence
+  --  of items (corresponding to Container.First, for a forward iterator).
+
+  --  Otherwise, this is iteration over a partial sequence of items. When
+  --  the Node component is non-null, the iterator object was constructed
+  --  with a start expression, that specifies the position from which the
+  --  (forward) partial iteration begins.
+
+  if Object.Node = null then
+ return Object.Container.First;
+  else
+ return Cursor'(Object.Container, Object.Node);
+  end if;
+   end First;
+
---
-- First_Element --
---
@@ -1347,6 +1400,75 @@
   B := B - 1;
end Iterate;
 
+   function Iterate (Container : Set)
+ return Set_Iterator_Interfaces.Reversible_Iterator'Class
+   is
+  S : constant Set_Access := Container'Unrestricted_Access;
+  B : Natural renames S.Tree.Busy;
+
+   begin
+  --  The value of the Node component influences the behavior of the First
+  --  and Last selector functions of the iterator object. When the Node
+  --  component is null (as is the case here), this means the iterator
+  --  object was constructed without a start expression. This is a complete
+  --  iterator, meaning that the iteration starts from the (logical)
+  --  beginning of the sequence of items.
+
+  --  Note: For a forward iterator, Container.First is the beginning, and
+  --  for a reverse iterator, Container.Last is the beginning.
+
+  return It : constant Iterator := (Limited_Controlled with S, null) do
+ B := B + 1;
+  end return;
+   end Iterate;
+
+   function Iterate (Container : Set; Start : Cursor)
+ return Set_Iterator_Interfaces.Reversible_Iterator'Class
+   is
+  S : constant Set_Access := Container'Unrestricted_Access;
+  B : Natural renames S.Tree.Busy;
+
+   begin
+  --  It was formerly the case that when Start = No_Element, the partial
+  --  iterator was defined to behave the same as for a complete iterator,
+  --  and iterate over the entire sequence of items. However, those
+  --  semantics were unintuitive and arguably error-prone (it is too easy
+  --  to accident

Re: [Patch] Fix Bug 51162

2011-12-02 Thread Jason Merrill

OK.

Jason


Re: [PATCH] Fix varpool handling of register variables (PR47259)

2011-12-02 Thread Jan Hubicka
Am Fri 02 Dec 2011 01:38:46 PM CET schrieb Richard Guenther  
:




This fixes a recent regression on the testcase of PR47259 where
we bring local a global register variable during WPA time and
then mangle its register name spec during ltrans.  Oops.


indeed... (I am not in general sure how LTO should deal to global  
register vars: i.e. when function come from unit not declaring it, it  
should not limit RA)



It doesn't make sense to bring "local" global register variables.

Fixed by making varpool_externally_visible_p return true
for all DECL_HARD_REGISTER decls.


This seems fine to me: after all global register is always visible  
from the other units.


Honza, does this look like the correct fix?  Or should we represent
global register vars differently in the varpool?  Any idea what
caused this to regress?


No idea actually. Originally the strategy was to not make register  
variables hit the varpool (since they are registers, not vars in  
tranditional sense), but I believe that was changed long time ago  
(because for ipa-reference and friends it variable is).


Honza


Sofar tested on the testcase only.

Thanks,
Richard.

2011-12-02  Richard Guenther  

PR lto/47259
* ipa.c (varpool_externally_visible_p): Register variables
are always externally visible.

Index: gcc/ipa.c
===
--- gcc/ipa.c   (revision 181902)
+++ gcc/ipa.c   (working copy)
@@ -662,6 +662,8 @@ varpool_externally_visible_p (struct var
   if (varpool_used_from_object_file_p (vnode))
 return true;

+  if (DECL_HARD_REGISTER (vnode->decl))
+return true;
   if (DECL_PRESERVE_P (vnode->decl))
 return true;
   if (lookup_attribute ("externally_visible",






Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread William J. Schmidt


On Fri, 2011-12-02 at 14:59 +0100, Michael Matz wrote:
> Hi,
> 
> On Fri, 2 Dec 2011, William J. Schmidt wrote:
> 
> > > > -  tree def = gimple_get_lhs (stmt);
> > > > +  /* If this is a PHI, we only want to consider it if all of its
> > > > + arguments are SSA names (which are known to be defined in a
> > > > + single place).  This avoids errors when dealing with if-temps,
> > > > + for example.  */
> > > > +  if (gimple_code (stmt) == GIMPLE_PHI)
> > > > +for (i = 0; i < gimple_phi_num_args (stmt); i++)
> > > > +  if (TREE_CODE (gimple_phi_arg_def (stmt, i)) != SSA_NAME)
> > > > +   return;
> > > 
> > > Can you elaborate on this?  Why are for example constants not ok
> > > (which are the only things besides SSA names that should occur
> > > here)?
> > 
> > I ran into a bootstrap problem in gengtype.c without this that took me a
> > while to track down.  Control flow was like this:
> > 
> > 10
> >/ |
> >   11 |
> >\ |
> > 12
> >/ |
> >   13 |
> >\ |
> > 14
> >
> > Blocks 12 and 14 contained iftmp PHI statements of constants that looked
> > identical, but the constants were "defined" in different blocks.  Blocks
> > 11 and 13 were empty.
> > 
> > In block 12:
> > 
> > iftmp.132_1 = PHI<", "(10), ""(11)>;
> > 
> > In block 14:
> > 
> > iftmp.133_7 = PHI<", "(12), ""(13)>;
> 
> You never can regard same-looking PHI nodes from different blocks as 
> equivalent.  Checking for non-SSA-names is not sufficient, the arguments 
> need to have the same control dependence.  Replace the above constants 
> with SSA names to see it breaking too (assume x_2 and x_3 are defined at 
> function start for instance):
> 
> bb12
>iftmp.132_1 = PHI;
> 
> bb14:
>iftmp.133_7 = PHI;
> 
> Again, if the two conditions in bb10 and bb12 are different the phi 
> results will be different (x_2 vs x_3).  I'd punt and simply deal only 
> with PHI nodes in the current block, i.e. don't remember any PHI states 
> during the walking.
> 

Ah, of course, you're right.  I wasn't thinking about that properly.
I'll revisit this.

Thanks,
Bill

> 
> Ciao,
> Michael.
> 



Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread Michael Matz
Hi,

On Fri, 2 Dec 2011, William J. Schmidt wrote:

> > > -  tree def = gimple_get_lhs (stmt);
> > > +  /* If this is a PHI, we only want to consider it if all of its
> > > + arguments are SSA names (which are known to be defined in a
> > > + single place).  This avoids errors when dealing with if-temps,
> > > + for example.  */
> > > +  if (gimple_code (stmt) == GIMPLE_PHI)
> > > +for (i = 0; i < gimple_phi_num_args (stmt); i++)
> > > +  if (TREE_CODE (gimple_phi_arg_def (stmt, i)) != SSA_NAME)
> > > +   return;
> > 
> > Can you elaborate on this?  Why are for example constants not ok
> > (which are the only things besides SSA names that should occur
> > here)?
> 
> I ran into a bootstrap problem in gengtype.c without this that took me a
> while to track down.  Control flow was like this:
> 
> 10
>/ |
>   11 |
>\ |
> 12
>/ |
>   13 |
>\ |
> 14
>
> Blocks 12 and 14 contained iftmp PHI statements of constants that looked
> identical, but the constants were "defined" in different blocks.  Blocks
> 11 and 13 were empty.
> 
> In block 12:
> 
>   iftmp.132_1 = PHI<", "(10), ""(11)>;
> 
> In block 14:
> 
>   iftmp.133_7 = PHI<", "(12), ""(13)>;

You never can regard same-looking PHI nodes from different blocks as 
equivalent.  Checking for non-SSA-names is not sufficient, the arguments 
need to have the same control dependence.  Replace the above constants 
with SSA names to see it breaking too (assume x_2 and x_3 are defined at 
function start for instance):

bb12
   iftmp.132_1 = PHI;

bb14:
   iftmp.133_7 = PHI;

Again, if the two conditions in bb10 and bb12 are different the phi 
results will be different (x_2 vs x_3).  I'd punt and simply deal only 
with PHI nodes in the current block, i.e. don't remember any PHI states 
during the walking.


Ciao,
Michael.


Re: [RFC] Port libitm to powerpc

2011-12-02 Thread Iain Sandoe


On 1 Dec 2011, at 23:28, Iain Sandoe wrote:
now I'm slightly confused - do we need to preserve if across the  
call  or not?


erm.  not well phrased.

I am trying to get a grasp on what determines the set of registers  
that should be saved.


Initially, I was thinking that it was the "call-saved" set - which,  
in the Darwin ABI is silent about the FPSCR (consistent with  
Joseph's remark - although I note that the ABI doc, in most cases,  
states YES/NO for each register).


Now I'm wondering if the saved set needs to include most/all of the  
set that are saved for exceptions?


Notwithstanding the questions above, (which still stand) - attached is  
a first stab at the Darwin version.


David: I steered clear of using R2 - so it might have some bits useful  
for Aix too.


Richard: things that I did, intentionally, differently (and I'm not  
sure are correct).


1. I saved the CR
2. Once the vrs are saved, I update the VRsave mask to reflect that.

I used the config machinery - because the sjlj.S code ended up looking  
more different than similar.


two failures to track down  but maybe the answers to the questions  
above will produce a reason ;-)


cheers
Iain

==

Native configuration is powerpc-apple-darwin9

=== libitm tests ===

Schedule of variations:
unix/-m32
unix/-m64

Running target unix/-m32
Using /usr/local/dejagnu-1-4-4/share/dejagnu/baseboards/unix.exp as  
board description file for target.
Using /usr/local/dejagnu-1-4-4/share/dejagnu/config/unix.exp as  
generic interface file for target.
Using /GCC/gcc-live-trunk/libitm/testsuite/config/default.exp as tool- 
and-target-specific interface file.

Running /GCC/gcc-live-trunk/libitm/testsuite/libitm.c/c.exp ...
FAIL: libitm.c/cancel.c execution test
FAIL: libitm.c/simple-2.c execution test
Running /GCC/gcc-live-trunk/libitm/testsuite/libitm.c++/c++.exp ...
WARNING: libitm.c++/static_ctor.C compilation failed to produce  
executable


=== libitm Summary for unix/-m32 ===

# of expected passes21
# of unexpected failures2
# of expected failures  5
Running target unix/-m64
Using /usr/local/dejagnu-1-4-4/share/dejagnu/baseboards/unix.exp as  
board description file for target.
Using /usr/local/dejagnu-1-4-4/share/dejagnu/config/unix.exp as  
generic interface file for target.
Using /GCC/gcc-live-trunk/libitm/testsuite/config/default.exp as tool- 
and-target-specific interface file.

Running /GCC/gcc-live-trunk/libitm/testsuite/libitm.c/c.exp ...
FAIL: libitm.c/cancel.c execution test
FAIL: libitm.c/simple-2.c execution test
Running /GCC/gcc-live-trunk/libitm/testsuite/libitm.c++/c++.exp ...
WARNING: libitm.c++/static_ctor.C compilation failed to produce  
executable


=== libitm Summary for unix/-m64 ===

# of expected passes21
# of unexpected failures2
# of expected failures  5

=== libitm Summary ===

# of expected passes42
# of unexpected failures4
# of expected failures  10

==--===
Index: libitm/config/darwin/powerpc/sjlj.S
===
--- libitm/config/darwin/powerpc/sjlj.S (revision 0)
+++ libitm/config/darwin/powerpc/sjlj.S (revision 0)
@@ -0,0 +1,335 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Iain Sandoe .
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+
+#if defined(__ppc64__)
+#define MODE_CHOICE(x, y) y
+#else
+#define MODE_CHOICE(x, y) x
+#endif
+
+#define MACHINE MODE_CHOICE(ppc7400,ppc64)
+#define g_long  MODE_CHOICE(long, quad) /* usage is ".g_long" */
+#define GPR_BYTES   MODE_CHOICE(4,8)/* size of a GPR in bytes */
+#define LOG2_GPR_BYTES  MODE_CHOICE(2,3)/* log2(GPR_BYTES) */
+
+#define cmpgMODE_CHOICE(cmpw, cmpd)
+#define lg  MODE_CHOICE(lwz, ld)
+#define stg MODE_CHOICE(stw, std)
+#define lgx MODE_CHOICE(lwzx, ldx)
+#define stgxMODE_CHOICE(stwx, stdx)
+#d

Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread William J. Schmidt


On Fri, 2011-12-02 at 10:24 +0100, Richard Guenther wrote:
> On Thu, Dec 1, 2011 at 11:13 PM, William J. Schmidt
>  wrote:
> > Greetings,
> >
> > Bug 39976 reported a degradation to 200.sixtrack wherein a hot
> > single-block loop is broken into two blocks.  Investigation showed the
> > cause to be a redundant PHI statement in the block, which the
> > tree-outof-ssa logic doesn't handle well.  Currently we don't have code
> > following the introduction of the redundant PHI that can clean it up.
> >
> > This patch modifies the dom pass to include redundant PHIs in the logic
> > that removes redundant computations.  With the patch applied, the extra
> > block is no longer created and the 200.sixtrack degradation is removed.
> > This improves its performance by 7.3% on PowerPC64 32-bit and by 5.0% on
> > PowerPC64 64-bit.
> >
> > Bootstrapped and regtested on powerpc64-linux.  OK for trunk?
> >
> > Thanks,
> > Bill
> >
> >
> > 2011-11-29  Bill Schmidt  
> >
> >PR middle-end/39976
> >* tree-ssa-dom.c (enum expr_kind): Add EXPR_PHI.
> >(struct hashable_expr): Add struct phi field.
> >(initialize_hash_element): Handle phis.
> >(hashable_expr_equal_p): Likewise.
> >(iterative_hash_hashable_expr): Likewise.
> >(print_expr_hash_elt): Likewise.
> >(dom_opt_enter_block): Create equivalences from redundant phis.
> >(eliminate_redundant_computations): Handle redundant phis.
> >
> >
> > Index: gcc/tree-ssa-dom.c
> > ===
> > --- gcc/tree-ssa-dom.c  (revision 181501)
> > +++ gcc/tree-ssa-dom.c  (working copy)
> > @@ -52,7 +52,8 @@ enum expr_kind
> >   EXPR_UNARY,
> >   EXPR_BINARY,
> >   EXPR_TERNARY,
> > -  EXPR_CALL
> > +  EXPR_CALL,
> > +  EXPR_PHI
> >  };
> >
> >  struct hashable_expr
> > @@ -65,6 +66,7 @@ struct hashable_expr
> > struct { enum tree_code op;  tree opnd0, opnd1; } binary;
> > struct { enum tree_code op;  tree opnd0, opnd1, opnd2; } ternary;
> > struct { gimple fn_from; bool pure; size_t nargs; tree *args; } call;
> > +struct { size_t nargs; tree *args; } phi;
> >   } ops;
> >  };
> >
> > @@ -281,6 +283,19 @@ initialize_hash_element (gimple stmt, tree lhs,
> >   expr->kind = EXPR_SINGLE;
> >   expr->ops.single.rhs = gimple_goto_dest (stmt);
> > }
> > +  else if (code == GIMPLE_PHI)
> > +{
> > +  size_t nargs = gimple_phi_num_args (stmt);
> > +  size_t i;
> > +
> > +  expr->type = TREE_TYPE (gimple_phi_result (stmt));
> > +  expr->kind = EXPR_PHI;
> > +  expr->ops.phi.nargs = nargs;
> > +  expr->ops.phi.args = (tree *) xcalloc (nargs, sizeof (tree));
> > +
> > +  for (i = 0; i < nargs; i++)
> > +expr->ops.phi.args[i] = gimple_phi_arg_def (stmt, i);
> > +}
> >   else
> > gcc_unreachable ();
> >
> > @@ -439,6 +454,21 @@ hashable_expr_equal_p (const struct hashable_expr
> > return true;
> >   }
> >
> > +case EXPR_PHI:
> > +  {
> > +size_t i;
> > +
> > +if (expr0->ops.phi.nargs !=  expr1->ops.phi.nargs)
> > +  return false;
> > +
> > +for (i = 0; i < expr0->ops.phi.nargs; i++)
> > +  if (! operand_equal_p (expr0->ops.phi.args[i],
> > + expr1->ops.phi.args[i], 0))
> > +return false;
> > +
> > +return true;
> > +  }
> > +
> > default:
> >   gcc_unreachable ();
> > }
> > @@ -516,6 +546,15 @@ iterative_hash_hashable_expr (const struct hashabl
> >   }
> >   break;
> >
> > +case EXPR_PHI:
> > +  {
> > +size_t i;
> > +
> > +for (i = 0; i < expr->ops.phi.nargs; i++)
> > +  val = iterative_hash_expr (expr->ops.phi.args[i], val);
> > +  }
> > +  break;
> > +
> > default:
> >   gcc_unreachable ();
> > }
> > @@ -588,6 +627,22 @@ print_expr_hash_elt (FILE * stream, const struct e
> >   fprintf (stream, ")");
> > }
> > break;
> > +
> > +  case EXPR_PHI:
> > +{
> > +  size_t i;
> > +  size_t nargs = element->expr.ops.phi.nargs;
> > +
> > +  fprintf (stream, "PHI <");
> > +  for (i = 0; i < nargs; i++)
> > +{
> > +  print_generic_expr (stream, element->expr.ops.phi.args[i], 
> > 0);
> > +  if (i + 1 < nargs)
> > +fprintf (stream, ", ");
> > +}
> > +  fprintf (stream, ">");
> > +}
> > +break;
> > }
> >   fprintf (stream, "\n");
> >
> > @@ -1688,6 +1743,10 @@ dom_opt_enter_block (struct dom_walk_data *walk_da
> >   /* PHI nodes can create equivalences too.  */
> >   record_equivalences_from_phis (bb);
> >
> > +  /* Create equivalences from redundant PHIs.  */
> > +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> > +eliminate_redundant_computations (&gsi);
> > +
> >   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> > optimize_s

Re: [lra] patch mostly implementing pseudo live range split

2011-12-02 Thread Hans-Peter Nilsson
On Tue, 29 Nov 2011, Vladimir Makarov wrote:
> 3. The patch rewrite the way of dealing with the secondary memory
>moves in constraint pass of LRA.  Previously we generated secondary
>moves if macro SECONDARY_MEMORY_NEEDED says so.  Unfortunately, the
>macro is usually defined inaccurately.

I do not doubt that, but I think it would help if you mentioned
what you see that is wrong, in particular if it's consistent
among targets.

For example, for MIPS (and I'd probably for other targets too if
I looked) for an older gcc, I've seen calls with class ==
NO_REGS to the related function mips_secondary_reload_class due
to MEMORY_MOVE_COST applied to a constant, which becomes a bit
of a problem if it's used as-is as a first argument to
reg_class_subset_p (the empty class being a subset of every
class).

brgds, H-P


Re: [PATCH] Improve debug info if tree DCE removes stores (PR debug/50317)

2011-12-02 Thread Michael Matz
Hi,

On Fri, 2 Dec 2011, Richard Guenther wrote:

> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> > 
> > 2011-12-01  Jakub Jelinek  
> > 
> > PR debug/50317
> > * tree-ssa-dce.c (remove_dead_stmt): Add a debug stmt when removing
> > as unnecessary a store to a variable with gimple reg type.
> > * tree-ssa-live.c (remove_unused_locals): Clear TREE_ADDRESSABLE bit
> > on local unreferenced variables.
> 
> This change seems wrong.  We are turning valid gimple
> 
> # DEBUG D#2 => transfer.0  [with addres taken]
> 
> into invalid one
> 
> # DEBUG D#2 => transfer.0  [without address taken]

Why would it be invalid?  It's meaningful to talk about a full object and 
references to it even without having its address taken.  Normal 
loads/stores do the same.

> once you update that stmt with update_stmt you'll get an SSA operand
> for transfer.0

That's the thing which should be fixed then.

> Why do this in remove_unused_locals and not in update_address_taken?

Another walk over all statements just for this?  Meh.

> Or, why do it at all?

The debug machinery seems to be unhappy about variables that are address 
taken.

> I have a SSA operand checking patch that catches this now ...


Ciao,
Michael.


[PATCH] Re-write SSA operand checking

2011-12-02 Thread Richard Guenther

This rewrites SSA operand checking to basically check that the
operand status is the same as if you'd do an update_stmt on the
stmt.  The current checking code does not properly verify that
all stmts are properly updated as it misses missing and swapped
operands for example (bugs like that I've fixed in the past,
always required some major debugging).

This means that the operand scanner implementation is the
reference of what is supposed to be in the operand lists and
what not.  Thus, checking code and implementation will not
go out of sync.

Bootstrapped and tested on x86_64-unknown-linux-gnu.  It exposes

FAIL: gfortran.fortran-torture/compile/pr45738.f90,  -O3 -g   (internal 
compiler error)

which is a bug introduced by

2011-12-01  Jakub Jelinek  

PR debug/50317
* tree-ssa-live.c (remove_unused_locals): Clear TREE_ADDRESSABLE bit
on local unreferenced variables.

which produces invalid GIMPLE.

Richard.

2011-12-02  Richard Guenther  

* tree-ssa.c (verify_ssa): Verify SSA names in the loop
over all SSA names.  Remove SSA operand checking, call
verify_ssa_operands.
* tree-ssa-operands.h (verify_ssa_operands): Declare.
* tree-ssa-operands.c (verify_ssa_operands): New function.

Index: gcc/tree-ssa.c
===
*** gcc/tree-ssa.c  (revision 181902)
--- gcc/tree-ssa.c  (working copy)
*** verify_ssa (bool check_modified_stmt)
*** 933,938 
--- 933,940 
  gimple stmt;
  TREE_VISITED (name) = 0;
  
+ verify_ssa_name (name, !is_gimple_reg (name));
+ 
  stmt = SSA_NAME_DEF_STMT (name);
  if (!gimple_nop_p (stmt))
{
*** verify_ssa (bool check_modified_stmt)
*** 982,990 
{
  gimple stmt = gsi_stmt (gsi);
  use_operand_p use_p;
- bool has_err;
- int count;
- unsigned i;
  
  if (check_modified_stmt && gimple_modified_p (stmt))
{
--- 984,989 
*** verify_ssa (bool check_modified_stmt)
*** 994,1082 
  goto err;
}
  
! if (is_gimple_assign (stmt)
! && TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
!   {
! tree lhs, base_address;
! 
! lhs = gimple_assign_lhs (stmt);
! base_address = get_base_address (lhs);
! 
! if (base_address
! && SSA_VAR_P (base_address)
! && !gimple_vdef (stmt)
! && optimize > 0)
!   {
! error ("statement makes a memory store, but has no VDEFS");
! print_gimple_stmt (stderr, stmt, 0, TDF_VOPS);
! goto err;
!   }
!   }
! else if (gimple_debug_bind_p (stmt)
!  && !gimple_debug_bind_has_value_p (stmt))
!   continue;
! 
! /* Verify the single virtual operand and its constraints.  */
! has_err = false;
! if (gimple_vdef (stmt))
!   {
! if (gimple_vdef_op (stmt) == NULL_DEF_OPERAND_P)
!   {
! error ("statement has VDEF operand not in defs list");
! has_err = true;
!   }
! if (!gimple_vuse (stmt))
!   {
! error ("statement has VDEF but no VUSE operand");
! has_err = true;
!   }
! else if (SSA_NAME_VAR (gimple_vdef (stmt))
!  != SSA_NAME_VAR (gimple_vuse (stmt)))
!   {
! error ("VDEF and VUSE do not use the same symbol");
! has_err = true;
!   }
! has_err |= verify_ssa_name (gimple_vdef (stmt), true);
!   }
! if (gimple_vuse (stmt))
{
! if  (gimple_vuse_op (stmt) == NULL_USE_OPERAND_P)
!   {
! error ("statement has VUSE operand not in uses list");
! has_err = true;
!   }
! has_err |= verify_ssa_name (gimple_vuse (stmt), true);
!   }
! if (has_err)
!   {
! error ("in statement");
! print_gimple_stmt (stderr, stmt, 0, TDF_VOPS|TDF_MEMSYMS);
  goto err;
}
  
! count = 0;
! FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE|SSA_OP_DEF)
!   {
! if (verify_ssa_name (op, false))
!   {
! error ("in statement");
! print_gimple_stmt (stderr, stmt, 0, TDF_VOPS|TDF_MEMSYMS);
! goto err;
!   }
! count++;
!   }
! 
! for (i = 0; i < gimple_num_ops (stmt); i++)
!   {
! op = gimple_op (stmt, i);
! if (op && TREE_CODE (op) == SSA_NAME && --count < 0)
!   {
! error ("number of operands and imm-links don%'t agree"
!   

Re: [PATCH] Improve debug info if tree DCE removes stores (PR debug/50317)

2011-12-02 Thread Richard Guenther
On Thu, 1 Dec 2011, Jakub Jelinek wrote:

> Hi!
> 
> As discussed in the PR, in 4.7 we regressed some GDB testcases, because
> previously unused addressable vars were first previously optimized into
> non-addressable and only afterwards removed (which results in correct debug
> stmts covering those assignments), but after some recent changes it is
> CDDCE that removes them before they are update_address_taken optimized.
> 
> In the PR I've offered a patch to schedule another update_address_taken
> pass before first cddce, but Michael is right that perhaps some other
> DCE pass could have similar issue.
> 
> So this patch instead, if the DCEd var stores have addressable lhs, but
> with is_gimple_reg_type types, we add debug stmts even for them.
> Such variables aren't target_for_debug_bind though, which breaks
> var-tracking.  So, the patch if all occurrences of the var are optimized
> away, just clears TREE_ADDRESSABLE bit like update_address_taken would,
> and, if that didn't happen until expansion, just ignores those debug
> stmts so that var-tracking isn't upset by seing non-tracked vars in debug
> insns.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2011-12-01  Jakub Jelinek  
> 
>   PR debug/50317
>   * tree-ssa-dce.c (remove_dead_stmt): Add a debug stmt when removing
>   as unnecessary a store to a variable with gimple reg type.
>   * tree-ssa-live.c (remove_unused_locals): Clear TREE_ADDRESSABLE bit
>   on local unreferenced variables.

This change seems wrong.  We are turning valid gimple

# DEBUG D#2 => transfer.0  [with addres taken]

into invalid one

# DEBUG D#2 => transfer.0  [without address taken]

once you update that stmt with update_stmt you'll get an SSA operand
for transfer.0 which is not in SSA form because you fail to rewrite it
into.

Why do this in remove_unused_locals and not in update_address_taken?
Or, why do it at all?

I have a SSA operand checking patch that catches this now ...

Thanks,
Richard.


>   * cfgexpand.c (expand_gimple_basic_block): Don't emit DEBUG_INSNs
>   for !target_for_debug_bind variables.
> 
> --- gcc/tree-ssa-live.c.jj2011-11-28 15:41:46.376749700 +0100
> +++ gcc/tree-ssa-live.c   2011-12-01 12:04:12.920595572 +0100
> @@ -1,5 +1,5 @@
>  /* Liveness for SSA trees.
> -   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010
> +   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011
> Free Software Foundation, Inc.
> Contributed by Andrew MacLeod 
>  
> @@ -814,7 +814,15 @@ remove_unused_locals (void)
> bitmap_set_bit (global_unused_vars, DECL_UID (var));
>   }
> else
> - continue;
> + {
> +   /* For unreferenced local vars drop TREE_ADDRESSABLE
> +  bit in case it is referenced from debug stmts.  */
> +   if (DECL_CONTEXT (var) == current_function_decl
> +   && TREE_ADDRESSABLE (var)
> +   && is_gimple_reg_type (TREE_TYPE (var)))
> + TREE_ADDRESSABLE (var) = 0;
> +   continue;
> + }
>   }
>else if (TREE_CODE (var) == VAR_DECL
>  && DECL_HARD_REGISTER (var)
> --- gcc/tree-ssa-dce.c.jj 2011-11-28 15:41:46.376749700 +0100
> +++ gcc/tree-ssa-dce.c2011-12-01 12:04:12.920595572 +0100
> @@ -1215,6 +1215,26 @@ remove_dead_stmt (gimple_stmt_iterator *
> ei_next (&ei);
>  }
>  
> +  /* If this is a store into a variable that is being optimized away,
> + add a debug bind stmt if possible.  */
> +  if (MAY_HAVE_DEBUG_STMTS
> +  && gimple_assign_single_p (stmt)
> +  && is_gimple_val (gimple_assign_rhs1 (stmt)))
> +{
> +  tree lhs = gimple_assign_lhs (stmt);
> +  if ((TREE_CODE (lhs) == VAR_DECL || TREE_CODE (lhs) == PARM_DECL)
> +   && !DECL_IGNORED_P (lhs)
> +   && is_gimple_reg_type (TREE_TYPE (lhs))
> +   && !is_global_var (lhs)
> +   && !DECL_HAS_VALUE_EXPR_P (lhs))
> + {
> +   tree rhs = gimple_assign_rhs1 (stmt);
> +   gimple note
> + = gimple_build_debug_bind (lhs, unshare_expr (rhs), stmt);
> +   gsi_insert_after (i, note, GSI_SAME_STMT);
> + }
> +}
> +
>unlink_stmt_vdef (stmt);
>gsi_remove (i, true);
>release_defs (stmt);
> --- gcc/cfgexpand.c.jj2011-12-01 11:44:56.156345109 +0100
> +++ gcc/cfgexpand.c   2011-12-01 12:37:57.764791257 +0100
> @@ -3903,6 +3903,11 @@ expand_gimple_basic_block (basic_block b
> rtx val;
> enum machine_mode mode;
>  
> +   if (TREE_CODE (var) != DEBUG_EXPR_DECL
> +   && TREE_CODE (var) != LABEL_DECL
> +   && !target_for_debug_bind (var))
> + goto delink_debug_stmt;
> +
> if (gimple_debug_bind_has_value_p (stmt))
>   value = gimple_debug_bind_get_value (stmt);
> else
> @@ -3932,6 +3937,7 @@ expand_gimple_basic_block (basic_block b
> PAT_VAR_LOCATION_LOC (val) = (rtx)value;
>

Re: Adjust omp-low test for alignment

2011-12-02 Thread Hans-Peter Nilsson
On Tue, 29 Nov 2011, Hans-Peter Nilsson wrote:

> On Tue, 29 Nov 2011, Richard Henderson wrote:
> > On 11/28/2011 08:49 PM, Hans-Peter Nilsson wrote:
> > > On Sat, 26 Nov 2011, Richard Henderson wrote:
> > >> The m68k-linux failure for the various omp atomic tests
> > >> is due to the fact that BIGGEST_ALIGNMENT is 16 bits on
> > >> that platform.  I think it's pretty reasonable to assume
> > >> that if something is aligned to BIGGEST_ALIGNEMENT, then
> > >> it can be considered "aligned".
> > >
> > > BIGGEST_ALIGNMENT means aligned enough for normal access, but
> > > not necessarily for atomic access.
> >
> > If that's true,
>
> It's what that macro meant up until gcc started to be
> atomicity-aware at this level, as implied by "when violated, may
> cause a fault".  Changing it to higher makes gcc do all stupid
> things when accessing structure members with lower alignment so
> I can't do that, it violates the byte-aligment ABI.
>
> > then you'll have problems applying any of these
> > functions without additional source-code level alignment, everywhere.
>
> There has to be a type that matches the (let's call it)
> ATOMIC_ALIGNMENT yes, is that what you mean by "any of these
> functions"?

Oh, on second reading I see you probably mean I have to make
sure the atomic types are aligned in the library, by e.g.
attaching __attribute__ ((__aligned__)).  Sure: the reply to
this change in the gut of gcc is however more important to make
sure it's not cast in stone and copied to other places that I'll
only find the hard way.  BTW, on the topic, I cringe whenever I
see futexes expressed as plain "int", they absolutely have to
have at least natural alignment which is not always true e.g. in
structs.  People, please keep the atomic types
target-overridable in libraries.

> > > Not that OMP support is imminent or critical for cris-linux or
> > > anything, but can we have a new macro?
> >
> > I'm not sure what you're suggesting that the macro actually do.
>
> Tell proper aligmnent for atomic access, defaulting to (say)
> natural aligmnent.
>
> brgds, H-P
>


Re: SH atomic asms in glibc and the stack pointer

2011-12-02 Thread Kaz Kojima
Ulrich Drepper  wrote:
> Has the gcc patch been committed?

Yes, it has been committed as revision 181825 on gcc trunk.

Regards,
kaz


RFC: ARM 64-bit shifts in NEON

2011-12-02 Thread Andrew Stubbs

Hi All,

I'm trying to implement DImode shifts using ARM NEON instructions. This 
wouldn't be difficult in itself, but making it play nice with the 
existing implementation is causing me problems. I'd like a few 
suggestions/pointers/comments to help me get this right, please.


The existing shift mechanisms must be kept, partly because the NEON unit 
is optional, and partly because it does not permit the full range of 
DImode operations, so sometimes it's more efficient to do 64-bit 
operations in core-registers, rather than copy all the values over to 
NEON, do the operation, and move the result back. Which set of patterns 
are used is determined by the register allocator and its costs mechanism.


The late decision means that the patterns may only use the post-reload 
splitter, and so cannot rely on many of the usual passes to sort out 
inefficiencies. In particular, the lack of combine makes it hard to 
detect and optimize extend-and-copy sequences.


So, I've attached two patches. The first is neon-shifts.patch, and does 
most of the work. The second is extendsidi2_neon.patch, and is intended 
to aid moving the shift amount from SImode registers, but doesn't go as 
far as I'd like.


I've not actually tested any of the output code just yet, so there may 
be logic errors, but those are easily fixed later, and what I'm trying 
to get right here is the GCC machine description.


Given this testcase:

   void
   f (long long *a, int b)
   {
 *a = *a << b;
   }

Without any patches, GCC gives this output, using only ARM core 
registers (in thumb2 mode):


   f:
 ldr r2, [r0, #0]
 ldr r3, [r0, #4]
 push{r4, r5, r6}
 rsb r6, r1, #32
 sub r4, r1, #32
 lsrsr6, r2, r6
 lslsr5, r2, r4
 lslsr3, r3, r1
 lslsr1, r2, r1
 orrsr3, r3, r6
 str r1, [r0, #0]
 andsr4, r3, r4, asr #32
 it  cc
 movcc   r4, r5
 str r4, [r0, #4]
 pop {r4, r5, r6}
 bx  lr

With just neon-shifts.patch, we get this output, now with NEON shifts:

f:
flddd17, [r0, #0]   @ int
mov r2, r1
movsr3, #0
push{r4, r5}
fmdrr   d18, r2, r3 @ int
vshl.i64d16, d17, d18
fstdd16, [r0, #0]   @ int
pop {r4, r5}
bx  lr


As you can see, the shift is much improved, but the shift amount is 
first extended into two SImode registers, and then moved to a NEON 
DImode register, which increases core-register pressure unnecessarily.


With both patches, we now get this:

f:
flddd17, [r0, #0]   @ int
vdup.32 d16, r1
vshr.u64d16, d16, #32   <-- still unnecessary
vshl.i64d16, d17, d16
fstdd16, [r0, #0]   @ int
bx  lr

Now the value is copied and then extended. I have chosen to use vdup.32 
instead of vmov.i32 because the latter can only target half the DImode 
registers. The right shift is necessary for a general zero-extend, but 
is not useful in this case as only the bottom 8 bits are interesting, 
and vdup has already done the right thing.


Note that the examples I've given are for left shifts. Right shifts are 
also implemented, but are a little more complicated (in the 
shift-by-register case) because the shift must be implemented as a left 
shift by a negative amount, and so an unspec is used to prevent the 
compiler doing anything 'clever'. Apart from an extra negation, the end 
result is much the same, but the patterns look different.



All this is a nice improvement, but I'm not happy:

1. The post-reload split means that I've had to add a clobber for CC to 
all the patterns, even though only some of them really need it. I think 
I've convinced myself that this is ok because it doesn't matter before 
scheduling, and after splitting the clobbers are only retained if 
they're really needed, but it still feels wrong.


2. The extend optimization is fine for general case extends, but it can 
be improved for the shift-amount case because we actually only need the 
bottom 8 bits, as indicated above. The problem is that there's no 
obvious way to achieve this:
   - there's no combine pass after this point, so a pattern that 
recognises and re-splits the extend, move and shift can't be used.
   - I don't believe there can be a pattern that uses SImode for the 
shift amount because the value needs to be in a DImode register 
eventually, and that means one needs to have been allocated before it 
gets split, and that means the extend needs to be separate.


3. The type of the shift-amount is determined by the type used in the 
ashldi3 pattern, and that uses SImode. This is fine for values already 
in SImode registers (probably the common case), but means that values 
already in DImode registers will have to get truncated and then 
re-extended, and this is not an operation that can generall

Re: SH atomic asms in glibc and the stack pointer

2011-12-02 Thread Ulrich Drepper
On Tue, Nov 29, 2011 at 17:44, Kaz Kojima  wrote:
> Uli, could you please approve the libc patch?

Has the gcc patch been committed?


[PATCH] Fix varpool handling of register variables (PR47259)

2011-12-02 Thread Richard Guenther

This fixes a recent regression on the testcase of PR47259 where
we bring local a global register variable during WPA time and
then mangle its register name spec during ltrans.  Oops.
It doesn't make sense to bring "local" global register variables.

Fixed by making varpool_externally_visible_p return true
for all DECL_HARD_REGISTER decls.

Honza, does this look like the correct fix?  Or should we represent
global register vars differently in the varpool?  Any idea what
caused this to regress?

Sofar tested on the testcase only.

Thanks,
Richard.

2011-12-02  Richard Guenther  

PR lto/47259
* ipa.c (varpool_externally_visible_p): Register variables
are always externally visible.

Index: gcc/ipa.c
===
--- gcc/ipa.c   (revision 181902)
+++ gcc/ipa.c   (working copy)
@@ -662,6 +662,8 @@ varpool_externally_visible_p (struct var
   if (varpool_used_from_object_file_p (vnode))
 return true;
 
+  if (DECL_HARD_REGISTER (vnode->decl))
+return true;
   if (DECL_PRESERVE_P (vnode->decl))
 return true;
   if (lookup_attribute ("externally_visible",


Re: [PATCH] Implement stap probe on ARM's unwinder

2011-12-02 Thread Bernd Schmidt
On 12/01/11 13:01, Ramana Radhakrishnan wrote:
> Sergio: Other than a few minor tweaks to the Changelog it largely
> looks obvious to me.
> 
> Bernd, could you take another look at this since this is now shared
> with the c6x backend ?

Doesn't look like it would cause problems. I have no idea what
builtin_frob_return_addr does but it appears to exist everywhere.


Bernd


Re: [STORMY16] Hookize FUNCTION_VALUE_REGNO_P

2011-12-02 Thread Richard Earnshaw
On 30/11/11 19:19, Anatoly Sokolov wrote:
> Hello.
> 
>   This patch removes obsolete FUNCTION_VALUE_REGNO_P macro from ARM back end 
> in the GCC and introduces equivalent TARGET_FUNCTION_VALUE_REGNO_P target 
> hook.
> 
>   Since the LIBCALL_VALUE macro should be poisoned in the future, this patch 
> replaces it with the arm_libcall_value_1 function.
> 
>   Bootstrapped and regression tested on arm-unknown-linux-gnueabi.
> 
> * config/arm/arm.h (LIBCALL_VALUE, FUNCTION_VALUE_REGNO_P): Remove.
> * config/arm/arm-protos.h (aapcs_libcall_value): Remove.
> * config/arm/arm.c (TARGET_FUNCTION_VALUE_REGNO_P): Define.
> (arm_libcall_value_1, arm_function_value_regno_p): New function.
> (arm_function_value, arm_libcall_value): Use arm_libcall_value_1.
> (aapcs_libcall_value): Make static.
> (arm_libcall_value): Add static qualifier
> 

OK

R.



Re: [Patch] Fix Bug 51162

2011-12-02 Thread Sameera Deshpande
On Wed, 2011-11-30 at 19:43 +, Jason Merrill wrote:
> On 11/24/2011 05:42 AM, Sameera Deshpande wrote:
> > -  if (TREE_CODE (fn) == ADDR_EXPR)
> > +  if (fn != NULL && TREE_CODE (fn) == ADDR_EXPR)
> >  fn = TREE_OPERAND (fn, 0);
> > -  if (TREE_CODE (fn) == FUNCTION_DECL && decl_is_tm_clone (fn))
> > +  if (fn != NULL && TREE_CODE (fn) == FUNCTION_DECL && decl_is_tm_clone 
> > (fn))
> >  pp_string (buffer, " [tm-clone]");
> > -  if (TREE_CODE (fn) == FUNCTION_DECL
> > +  if (fn != NULL
> 
> I'd rather not add the null check so many times.  How about just 
> returning if fn is null?
> 
> Jason
> 

Jason,

Thanks for your comment.
Please find attached reworked patch returning if fn is NULL.

the patch is tested with check-gcc for ARM.

-- diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index f0e7c50..3b5f670 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -698,6 +698,9 @@ dump_gimple_call (pretty_printer *buffer, gimple gs, int spc, int flags)
   if (gimple_call_tail_p (gs))
 pp_string (buffer, " [tail call]");
 
+  if (fn == NULL)
+return;
+
   /* Dump the arguments of _ITM_beginTransaction sanely.  */
   if (TREE_CODE (fn) == ADDR_EXPR)
 fn = TREE_OPERAND (fn, 0);

Re: [PATCH] Remove dead labels to increase superblock scope

2011-12-02 Thread Richard Sandiford
Tom de Vries  writes:
> On 27/11/11 23:59, Eric Botcazou wrote:
>>> No, DELETED_LABEL notes still work just fine. It depends on how you
>>> remove the label and replace it with a note, and Tom isn't showing
>>> what he did, so...
>> 
>> I agree that there is no obvious reason why just calling delete_insn would 
>> not 
>> work, so this should be investigated first.
>> 
>
> The reason it didn't work, is because after turning a label into a
> NOTE_INSN_DELETED_LABEL, one needs to move it to after the 
> NOTE_INSN_BASIC_BLOCK
> as in cfgcleanup.c:try_optimize_cfg():
> ...
> delete_insn_chain (label, label, false);
> /* If the case label is undeletable, move it after the
>BASIC_BLOCK note.  */
> if (NOTE_KIND (BB_HEAD (b)) == NOTE_INSN_DELETED_LABEL)
>   {
> rtx bb_note = NEXT_INSN (BB_HEAD (b));
>
> reorder_insns_nobb (label, label, bb_note);
> BB_HEAD (b) = bb_note;
> if (BB_END (b) == bb_note)
>   BB_END (b) = label;
>   }
> ...
>
> Attached patch factors out this piece of code and reuses it in 
> fixup_reorder_chain.

But isn't...

> @@ -2637,15 +2658,7 @@ try_optimize_cfg (int mode)
> delete_insn_chain (label, label, false);
> /* If the case label is undeletable, move it after the
>BASIC_BLOCK note.  */
> -   if (NOTE_KIND (BB_HEAD (b)) == NOTE_INSN_DELETED_LABEL)
> - {
> -   rtx bb_note = NEXT_INSN (BB_HEAD (b));
> -
> -   reorder_insns_nobb (label, label, bb_note);
> -   BB_HEAD (b) = bb_note;
> -   if (BB_END (b) == bb_note)
> - BB_END (b) = label;
> - }
> +   fixup_deleted_label (b);

...this "delete_insn_chain (label, label, false);" call equivalent
to "delete_insn (label)"?  Splitting the operation in two here and:

> Index: gcc/cfglayout.c
> ===
> --- gcc/cfglayout.c (revision 181652)
> +++ gcc/cfglayout.c (working copy)
> @@ -857,6 +857,12 @@ fixup_reorder_chain (void)
>  (e_taken->src, e_taken->dest));
> e_taken->flags |= EDGE_FALLTHRU;
> update_br_prob_note (bb);
> +   if (LABEL_NUSES (ret_label) == 0
> +   && single_pred_p (e_taken->dest))
> + {
> +   delete_insn (ret_label);
> +   fixup_deleted_label (e_taken->dest);
> + }

...here seems a little odd.

Richard


Re: Ping Re: Fix doloop bug with maximum-length loops

2011-12-02 Thread Richard Guenther
On Fri, Dec 2, 2011 at 3:28 AM, Andrew Pinski  wrote:
> On Thu, Dec 1, 2011 at 6:08 PM, Joseph S. Myers  
> wrote:
>> Ping.  This patch
>>  is pending
>> review.
>>
>
> From my point of view, reverting my patch is fine as the testcase
> which I was trying to optimized was not even optimized on the trunk
> after this patch anyways.

Thus, it's ok.

Thanks,
Richard.

> Thanks,
> Andrew Pinski


Re: [PATCH] Fix early inliner inlining uninlinable functions

2011-12-02 Thread Richard Guenther
On Fri, 2 Dec 2011, Jan Hubicka wrote:

> > 
> > Sure, but then you can still have the issue of an inconsistency.
> > Thus, would you then remove the remaining asserts?
> > 
> > I believe in the end the proper fix is to _not_ throw away
> > cgraph edges all the time, but keep them up-to-date and thus
> > make the stmt flag not necessary.  (we can define "up-to-date"
> > in a way so that we only require that existing edges that
> > still have a call stmt have to be valid, thus still require
> > incremental recomputation to remove dead edges and create
> > new ones)
> 
> Well, the stmt flag always looked redundat to me. We we just don't initialize
> the edge flag at cgraph construction time? We do have the statement then.

We've had the stmt flag because the gimplifier computed uninlinability
and stuck it on the CALL_EXPR tree, then transitioned it to the
gimple stmt.  We no longer do that, so yes - I'll prepare a patch to
kill all this after Diego committed his patch ;)

Richard.


Re: PR libgomp/51376 fix

2011-12-02 Thread Alan Modra
On Thu, Dec 01, 2011 at 12:36:08PM +0100, Jakub Jelinek wrote:
> On Thu, Dec 01, 2011 at 09:58:08PM +1030, Alan Modra wrote:
> > The GOMP_task change fixes a similar potential problem.  Bootstrapped
> > and regression tested powerpc-linux.  OK to apply?
> > 
> > PR libgomp/51376
> > * task.c (GOMP_taskwait): Don't access task->children outside of
> > task_lock mutex region.
> > (GOMP_task): Likewise.
> 
> Can't this be solved just by adding a barrier?  The access to the var
> outside of the lock has been quite intentional, to avoid locking in the
> common case where there are no children.

No, I tried that and the task-6.C testcase still failed although not
quite as readily.  I was using

if (task == NULL
|| __atomic_load_n (&task->children, MEMMODEL_ACQUIRE) == 0)

You need a release in the child as well as the acquire to ensure
proper synchronisation, and there's a window for failure between the
child clearing task->children and performing a release as part of the
mutex unlock.

Oops, on looking at this email I see I attached an old patch..  This
one avoids a segfault on trying to lock team->task_lock when there
is no team.  This one really has been bootstrapped and regression
tested successfully.

PR libgomp/51376
* task.c (GOMP_taskwait): Don't access task->children outside of
task_lock mutex region.
(GOMP_task): Likewise.

Index: libgomp/task.c
===
--- libgomp/task.c  (revision 181902)
+++ libgomp/task.c  (working copy)
@@ -116,10 +116,11 @@ GOMP_task (void (*fn) (void *), void *da
}
   else
fn (data);
-  if (task.children)
+  if (team != NULL)
{
  gomp_mutex_lock (&team->task_lock);
- gomp_clear_parent (task.children);
+ if (task.children != NULL)
+   gomp_clear_parent (task.children);
  gomp_mutex_unlock (&team->task_lock);
}
   gomp_end_task ();
@@ -290,8 +291,9 @@ GOMP_taskwait (void)
   struct gomp_task *child_task = NULL;
   struct gomp_task *to_free = NULL;
 
-  if (task == NULL || task->children == NULL)
+  if (task == NULL || team == NULL)
 return;
+
   gomp_mutex_lock (&team->task_lock);
   while (1)
 {

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH] PR c++/51289 - ICE with alias template for bound template

2011-12-02 Thread Dodji Seketeli
Dodji Seketeli  writes:

> Jason Merrill  writes:
>
>> I guess let's check DECL_ORIGINAL_TYPE instead of TREE_TYPE for alias
>> templates.
>
> Like the below that I am currently bootstrapping?

Finally this is what passed bootstrap and testing on
x86_64-unknown-linux-gnu against trunk.

From: Dodji Seketeli 
Date: Sat, 26 Nov 2011 11:50:43 +0100
Subject: [PATCH] PR c++/51289 - ICE with alias template for bound template
 template parm

gcc/cp/

PR c++/51289
* cp-tree.h (TYPE_TEMPLATE_INFO): Rewrite this accessor macro to
better support aliased types.
(TYPE_ALIAS_P): Don't crash on TYPE_NAME nodes that are not
TYPE_DECL.
* pt.c (find_parameter_packs_r): Handle types aliases.
(push_template_decl_real): Check for bare parameter packs in the
underlying type of an alias template.

gcc/PR51289/gcc/testsuite/

PR c++/51289
* g++.dg/cpp0x/alias-decl-17.C: New test.
---
 gcc/cp/cp-tree.h   |   28 ++--
 gcc/cp/pt.c|   19 ++-
 gcc/testsuite/g++.dg/cpp0x/alias-decl-17.C |   21 +
 3 files changed, 57 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-17.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 3f4f408..b821928 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -2553,6 +2553,7 @@ extern void decl_shadowed_for_var_insert (tree, tree);
 #define TYPE_ALIAS_P(NODE) \
   (TYPE_P (NODE)   \
&& TYPE_NAME (NODE) \
+   && TREE_CODE (TYPE_NAME (NODE)) == TYPE_DECL\
&& TYPE_DECL_ALIAS_P (TYPE_NAME (NODE)))
 
 /* For a class type: if this structure has many fields, we'll sort them
@@ -2605,17 +2606,24 @@ extern void decl_shadowed_for_var_insert (tree, tree);
   (LANG_TYPE_CLASS_CHECK (BOUND_TEMPLATE_TEMPLATE_PARM_TYPE_CHECK (NODE)) \
->template_info)
 
-/* Template information for an ENUMERAL_, RECORD_, or UNION_TYPE.  */
+/* Template information for an ENUMERAL_, RECORD_, UNION_TYPE, or
+   BOUND_TEMPLATE_TEMPLATE_PARM type.  Note that if NODE is a
+   specialization of an alias template, this accessor returns the
+   template info for the alias template, not the one (if any) for the
+   template of the underlying type.  */
 #define TYPE_TEMPLATE_INFO(NODE)   \
-  (TREE_CODE (NODE) == ENUMERAL_TYPE   \
-   ? ENUM_TEMPLATE_INFO (NODE) :   \
-   (TREE_CODE (NODE) == BOUND_TEMPLATE_TEMPLATE_PARM   \
-? TEMPLATE_TEMPLATE_PARM_TEMPLATE_INFO (NODE) :\
-((CLASS_TYPE_P (NODE) && !TYPE_ALIAS_P (NODE)) \
- ? CLASSTYPE_TEMPLATE_INFO (NODE)  \
- : ((TYPE_NAME (NODE) && DECL_LANG_SPECIFIC (TYPE_NAME (NODE)))\
-   ? (DECL_TEMPLATE_INFO (TYPE_NAME (NODE)))   \
-   : NULL_TREE
+  (TYPE_ALIAS_P (NODE) \
+   ? ((TYPE_NAME (NODE) && DECL_LANG_SPECIFIC (TYPE_NAME (NODE)))  \
+  ? DECL_TEMPLATE_INFO (TYPE_NAME (NODE))  \
+  : NULL_TREE) \
+   : ((TREE_CODE (NODE) == ENUMERAL_TYPE)  \
+  ? ENUM_TEMPLATE_INFO (NODE)  \
+  : ((TREE_CODE (NODE) == BOUND_TEMPLATE_TEMPLATE_PARM)\
+? TEMPLATE_TEMPLATE_PARM_TEMPLATE_INFO (NODE)  \
+: (CLASS_TYPE_P (NODE) \
+   ? CLASSTYPE_TEMPLATE_INFO (NODE)\
+   : NULL_TREE
+
 
 /* Set the template information for an ENUMERAL_, RECORD_, or
UNION_TYPE to VAL.  */
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 4725080..ee3a3ab 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -2976,6 +2976,20 @@ find_parameter_packs_r (tree *tp, int *walk_subtrees, 
void* data)
 (struct find_parameter_pack_data*)data;
   bool parameter_pack_p = false;
 
+  /* Handle type aliases/typedefs.  */
+  if (TYPE_P (t)
+  && TYPE_NAME (t)
+  && TREE_CODE (TYPE_NAME (t)) == TYPE_DECL
+  && TYPE_DECL_ALIAS_P (TYPE_NAME (t)))
+{
+  if (TYPE_TEMPLATE_INFO (t))
+   cp_walk_tree (&TYPE_TI_ARGS (t),
+ &find_parameter_packs_r,
+ ppd, ppd->visited);
+  *walk_subtrees = 0;
+  return NULL_TREE;
+}
+
   /* Identify whether this is a parameter pack or not.  */
   switch (TREE_CODE (t))
 {
@@ -4905,7 +4919,10 @@ push_template_decl_real (tree decl, bool is_friend)
   if (check_for_bare_parameter_packs (TYPE_RAISES_EXCEPTIONS (type)))
TYPE_RAISES_EXCEPTIONS (type) = NULL_TREE;
 }
-  else if (check_for_bare_parameter_packs (TREE_TYPE (decl)))
+  else if (chec

Re: Fix doloop bug with maximum-length loops

2011-12-02 Thread Richard Sandiford
Sorry for the slow response, still catching up.

"Joseph S. Myers"  writes:
> This code for doing the increment in from_mode comes from the fix for
> PR 37451 and the follow-up fix for PR 37782
> 
> .  As far as
> I can tell the idea of those changes - which were an attempt to
> improve optimization - is simply broken when the loop might have
> maximum length like this (which in the original PR 37451 case it
> can't, but telling that in this code would be nontrivial) - including
> the case of nonconstant length as well as that of constant length.
>
> So this patch reverts both those previous patches and adds testcases
> to demonstrate the problem they caused.  Bootstrapped with no
> regressions on powerpc-linux-gnu.  OK to commit?

Yeah, I agree that's the best way out.

> (If the patch holds up on trunk I'd propose it for 4.6 and 4.5 branches as 
> well, as a wrong-code regression fix.)

OK for all three unless a release manager objects.

Richard


Re: [PATCH] Fix PR middle-end/39976, 200.sixtrack degradation

2011-12-02 Thread Richard Guenther
On Thu, Dec 1, 2011 at 11:13 PM, William J. Schmidt
 wrote:
> Greetings,
>
> Bug 39976 reported a degradation to 200.sixtrack wherein a hot
> single-block loop is broken into two blocks.  Investigation showed the
> cause to be a redundant PHI statement in the block, which the
> tree-outof-ssa logic doesn't handle well.  Currently we don't have code
> following the introduction of the redundant PHI that can clean it up.
>
> This patch modifies the dom pass to include redundant PHIs in the logic
> that removes redundant computations.  With the patch applied, the extra
> block is no longer created and the 200.sixtrack degradation is removed.
> This improves its performance by 7.3% on PowerPC64 32-bit and by 5.0% on
> PowerPC64 64-bit.
>
> Bootstrapped and regtested on powerpc64-linux.  OK for trunk?
>
> Thanks,
> Bill
>
>
> 2011-11-29  Bill Schmidt  
>
>        PR middle-end/39976
>        * tree-ssa-dom.c (enum expr_kind): Add EXPR_PHI.
>        (struct hashable_expr): Add struct phi field.
>        (initialize_hash_element): Handle phis.
>        (hashable_expr_equal_p): Likewise.
>        (iterative_hash_hashable_expr): Likewise.
>        (print_expr_hash_elt): Likewise.
>        (dom_opt_enter_block): Create equivalences from redundant phis.
>        (eliminate_redundant_computations): Handle redundant phis.
>
>
> Index: gcc/tree-ssa-dom.c
> ===
> --- gcc/tree-ssa-dom.c  (revision 181501)
> +++ gcc/tree-ssa-dom.c  (working copy)
> @@ -52,7 +52,8 @@ enum expr_kind
>   EXPR_UNARY,
>   EXPR_BINARY,
>   EXPR_TERNARY,
> -  EXPR_CALL
> +  EXPR_CALL,
> +  EXPR_PHI
>  };
>
>  struct hashable_expr
> @@ -65,6 +66,7 @@ struct hashable_expr
>     struct { enum tree_code op;  tree opnd0, opnd1; } binary;
>     struct { enum tree_code op;  tree opnd0, opnd1, opnd2; } ternary;
>     struct { gimple fn_from; bool pure; size_t nargs; tree *args; } call;
> +    struct { size_t nargs; tree *args; } phi;
>   } ops;
>  };
>
> @@ -281,6 +283,19 @@ initialize_hash_element (gimple stmt, tree lhs,
>       expr->kind = EXPR_SINGLE;
>       expr->ops.single.rhs = gimple_goto_dest (stmt);
>     }
> +  else if (code == GIMPLE_PHI)
> +    {
> +      size_t nargs = gimple_phi_num_args (stmt);
> +      size_t i;
> +
> +      expr->type = TREE_TYPE (gimple_phi_result (stmt));
> +      expr->kind = EXPR_PHI;
> +      expr->ops.phi.nargs = nargs;
> +      expr->ops.phi.args = (tree *) xcalloc (nargs, sizeof (tree));
> +
> +      for (i = 0; i < nargs; i++)
> +        expr->ops.phi.args[i] = gimple_phi_arg_def (stmt, i);
> +    }
>   else
>     gcc_unreachable ();
>
> @@ -439,6 +454,21 @@ hashable_expr_equal_p (const struct hashable_expr
>         return true;
>       }
>
> +    case EXPR_PHI:
> +      {
> +        size_t i;
> +
> +        if (expr0->ops.phi.nargs !=  expr1->ops.phi.nargs)
> +          return false;
> +
> +        for (i = 0; i < expr0->ops.phi.nargs; i++)
> +          if (! operand_equal_p (expr0->ops.phi.args[i],
> +                                 expr1->ops.phi.args[i], 0))
> +            return false;
> +
> +        return true;
> +      }
> +
>     default:
>       gcc_unreachable ();
>     }
> @@ -516,6 +546,15 @@ iterative_hash_hashable_expr (const struct hashabl
>       }
>       break;
>
> +    case EXPR_PHI:
> +      {
> +        size_t i;
> +
> +        for (i = 0; i < expr->ops.phi.nargs; i++)
> +          val = iterative_hash_expr (expr->ops.phi.args[i], val);
> +      }
> +      break;
> +
>     default:
>       gcc_unreachable ();
>     }
> @@ -588,6 +627,22 @@ print_expr_hash_elt (FILE * stream, const struct e
>           fprintf (stream, ")");
>         }
>         break;
> +
> +      case EXPR_PHI:
> +        {
> +          size_t i;
> +          size_t nargs = element->expr.ops.phi.nargs;
> +
> +          fprintf (stream, "PHI <");
> +          for (i = 0; i < nargs; i++)
> +            {
> +              print_generic_expr (stream, element->expr.ops.phi.args[i], 0);
> +              if (i + 1 < nargs)
> +                fprintf (stream, ", ");
> +            }
> +          fprintf (stream, ">");
> +        }
> +        break;
>     }
>   fprintf (stream, "\n");
>
> @@ -1688,6 +1743,10 @@ dom_opt_enter_block (struct dom_walk_data *walk_da
>   /* PHI nodes can create equivalences too.  */
>   record_equivalences_from_phis (bb);
>
> +  /* Create equivalences from redundant PHIs.  */
> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +    eliminate_redundant_computations (&gsi);
> +
>   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>     optimize_stmt (bb, gsi);
>
> @@ -1818,13 +1877,27 @@ eliminate_redundant_computations (gimple_stmt_iter
>  {
>   tree expr_type;
>   tree cached_lhs;
> +  tree def;
>   bool insert = true;
>   bool assigns_var_p = false;
> +  size_t i;
>
>   gimple stmt = gsi_stmt (*gsi);
>
> -  tree def = gimple_get_lhs (stmt);
> +  /* If this is a PHI, we only want to consider it if a

Re: [PATCH, PR 50622] Force a gimple operand in load_assign_lhs_subreplacements when necessary

2011-12-02 Thread Richard Guenther
On Thu, 1 Dec 2011, Martin Jambor wrote:

> Hi,
> 
> PR 50622 is an omission in load_assign_lhs_subreplacements, which
> should force a gimple operand on a RHS of a gimple assignment if both
> sides are new replacements of scalar types which are not gimple
> registers, because they are partially modified (which can happen to
> complex numbers and bit-fields).
> 
> Fixed with the patch below.  It passes bootstrap and testsuite on
> x86_64-linux, I am about to do the same on the 4.6 branch because I'd
> like to commit it there as well.  OK for trunk and the 4.6 branch?

Ok for both.

Thanks,
Richard.

> Thanks,
> 
> Martin
> 
> 
> 2011-12-01  Martin Jambor  
> 
>   PR tree-optimization/50622
>   * tree-sra.c (load_assign_lhs_subreplacements): Force gimple operand
>   if both lacc and racc are grp_partial_lhs.
> 
>   * testsuite/g++.dg/tree-ssa/pr50622.C: New test.
> 
> Index: src/gcc/tree-sra.c
> ===
> --- src.orig/gcc/tree-sra.c
> +++ src/gcc/tree-sra.c
> @@ -2692,6 +2692,10 @@ load_assign_lhs_subreplacements (struct
> rhs = get_access_replacement (racc);
> if (!useless_type_conversion_p (lacc->type, racc->type))
>   rhs = fold_build1_loc (loc, VIEW_CONVERT_EXPR, lacc->type, rhs);
> +
> +   if (racc->grp_partial_lhs && lacc->grp_partial_lhs)
> + rhs = force_gimple_operand_gsi (old_gsi, rhs, true, NULL_TREE,
> + true, GSI_SAME_STMT);
>   }
> else
>   {
> Index: src/gcc/testsuite/g++.dg/tree-ssa/pr50622.C
> ===
> --- /dev/null
> +++ src/gcc/testsuite/g++.dg/tree-ssa/pr50622.C
> @@ -0,0 +1,30 @@
> +// { dg-do compile }
> +// { dg-options "-O2" }
> +
> +typedef __complex__ double Value;
> +struct LorentzVector
> +{
> +  LorentzVector & operator+=(const LorentzVector & a) {
> +theX += a.theX;
> +theY += a.theY;
> +theZ += a.theZ;
> +theT += a.theT;
> +return *this;
> +  }
> +
> +  Value theX;
> +  Value theY;
> +  Value theZ;
> +  Value theT;
> +};
> +
> +inline LorentzVector
> +operator+(LorentzVector a, const LorentzVector & b) {
> +  return a += b;
> +}
> +
> +Value ex, et;
> +LorentzVector sum() {
> +  LorentzVector v1; v1.theX =ex; v1.theY =ex+et; v1.theZ =ex-et;   v1.theT 
> =et;
> +  return v1+v1;
> +}
> 
> 

-- 
Richard Guenther 
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer

Re: [Patch, i386] Limit unroll factor for certain loops on Corei7

2011-12-02 Thread Andreas Krebbel
On Thu, Dec 01, 2011 at 11:39:36PM -0800, Teresa Johnson wrote:
> To do this I leveraged the existing TARGET_LOOP_UNROLL_ADJUST target
> hook, which was previously only defined for s390. I added one
> additional call to this target hook, when unrolling for constant trip
> count loops. Previously it was only called for runtime computed trip
> counts. Andreas, can you comment on the effect for s390 of this
> additional call of the target hook, since I can't measure that?

Limiting the unrolling of loops with constant iterations makes also
sense for s390.  However, the limitations are only relevant if it
actually stays a loop. If the loop gets completely peeled into a
sequential instruction stream there should be no limitation. But as I
understand it this will be done by different code paths.

So I think the change should be ok for s390 as well. It will take some
time to get measurements on that. I'll try to keep that in mind until
then.

Bye,

-Andreas-