Copilot commented on code in PR #243: URL: https://github.com/apache/uima-uimaj/pull/243#discussion_r3298036767
########## uimaj-core/src/test/java/org/apache/uima/jcas/impl/LoadingBuiltinAnnotationBeforeCasTest.java: ########## @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.uima.jcas.impl; + +import static java.lang.System.getProperty; +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.ByteArrayOutputStream; +import java.io.File; + +import org.apache.uima.jcas.tcas.Annotation; +import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl; +import org.apache.uima.util.CasCreationUtils; +import org.junit.jupiter.api.Test; + +/** + * Reproduces <a href="https://github.com/apache/uima-uimaj/issues/234">issue #234</a>: when a + * built-in Annotation cover class is touched before any CAS exists, the type system records a wrong + * (zero) jcasType for it and Annotation's feature callsites are never updated. Subsequent + * {@code getCasType} / {@code setBegin} calls then fail. + * + * <p> + * The bug only manifests in a fresh JVM where nothing has yet initialized {@link Annotation}. Class + * initialization is once-per-JVM, so running this in the shared Surefire JVM (where other tests + * have already created CASes and thereby initialized Annotation normally) would not reproduce it. + * The test therefore forks a dedicated JVM running {@link #main(String[])} and asserts on its exit + * code. + */ +public class LoadingBuiltinAnnotationBeforeCasTest { + + /** + * Reproducer body. Must run in a fresh JVM (see the {@code @Test} below). Throws on failure so + * the JVM exits non-zero. + */ + public static void main(String[] args) throws Exception { + // Force Annotation's <clinit> to start now, before any TypeSystemImpl / CAS exists. + // Annotation's super (AnnotationBase) references TypeSystemImpl in its own <clinit>, which + // kicks off a recursive init storm that reads Annotation.typeIndexID while it's still 0 -- + // caching a broken JCasClassInfo and skipping Annotation's _FC_begin / _FC_end callsite update. + Class.forName(Annotation.class.getName()); + + var tsd = new TypeSystemDescription_impl(); + var jcas = CasCreationUtils.createCas(tsd, null, null).getJCas(); + + // (1) Type system lookup by JCas type id. With the bug, slot Annotation.type of + // jcasRegisteredTypes is empty (the JCCI got registered at slot 0 instead). + jcas.getCasType(Annotation.type); + + // (2) setDocumentText eventually calls Annotation.setBegin, which dispatches through the + // _FC_begin callsite. With the bug, that callsite still returns -1 and the offset lookup + // throws ArrayIndexOutOfBoundsException. + jcas.setDocumentText("hello"); + } + + @Test + void thatLoadingAnnotationBeforeCasDoesNotBreakTypeSystemManagement() throws Exception { + var windows = getProperty("os.name").toLowerCase().contains("win"); + var java = getProperty("java.home") + File.separator + "bin" + File.separator + + (windows ? "java.exe" : "java"); + + var pb = new ProcessBuilder(java, "-cp", getProperty("java.class.path"), getClass().getName()); + pb.redirectErrorStream(true); + var p = pb.start(); + + var buf = new ByteArrayOutputStream(); + p.getInputStream().transferTo(buf); + int exit = p.waitFor(); + Review Comment: The test waits for the forked JVM using `p.waitFor()` with no timeout. If the subprocess deadlocks or hangs (which is plausible for class-init regression tests), the build can hang indefinitely. Consider using `waitFor(timeout, unit)` and failing the test if the subprocess does not exit within a reasonable time, optionally destroying the process. ########## uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java: ########## @@ -2867,11 +2865,34 @@ public List<TypeImpl> getAllTypes() { } public static final TypeSystemImpl staticTsi = new TypeSystemImpl(); - static { - TypeSystemImpl tsi = staticTsi.commit(); // needed to assign adjusted offsets to the builtins - if (tsi != staticTsi) { - Misc.internalError(); + + // staticTsi was previously committed inside this class's <clinit>. That triggered a recursive + // load-cover-classes storm: a built-in JCas cover class's <clinit> could be the very thing that + // caused TypeSystemImpl to load (e.g. via createCallSiteForBuiltIn). The commit then asked the + // JVM to fully initialize that same cover class while it was still mid-init on the current + // thread, producing a zero/default typeIndexID and broken callsites (issue #234). Commit is now + // deferred until first real demand via committedStaticTsi(). + private static volatile boolean staticTsiCommitted; + + /** + * Returns {@link #staticTsi} after ensuring it has been committed. Commit is performed lazily and + * exactly once across all threads. Code that needs commit-dependent state on staticTsi (e.g. + * {@link TypeImpl#getAdjOffset(String)}) must go through this accessor rather than + * {@link #staticTsi} directly. + */ + public static TypeSystemImpl committedStaticTsi() { + if (!staticTsiCommitted) { + synchronized (TypeSystemImpl.class) { + if (!staticTsiCommitted) { + TypeSystemImpl tsi = staticTsi.commit(); + if (tsi != staticTsi) { + Misc.internalError(); + } + staticTsiCommitted = true; + } + } } + return staticTsi; Review Comment: `committedStaticTsi()` assumes `staticTsi.commit()` always returns `staticTsi` and calls `Misc.internalError()` otherwise. With commit now deferred, it becomes possible for another equal `TypeSystemImpl` instance (e.g. created via `CASFactory.createTypeSystem()` and committed first) to enter the `committedTypeSystems` consolidation map before `staticTsi` is committed, in which case `staticTsi.commit()` may legally return the already-committed consolidated instance (not `staticTsi`) and this code will hard-fail. Consider adjusting this accessor to tolerate consolidation (e.g. store and return the actual committed instance from `staticTsi.commit()`), or otherwise ensure `staticTsi` is committed before any other equivalent type system can be committed. ########## uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java: ########## @@ -2130,9 +2130,7 @@ public int getFeatureOffset(TypeImpl ti, String featureShortName) { // TypeInfo getTypeInfo(int typeCode) { // if (null == typeInfoArray[typeCode]) { // TypeImpl type = (TypeImpl) ll_getTypeForCode(typeCode); - //// if (null == type) { - //// diagnoseBadTypeCode(typeCode); - //// } + //// if (null == type) { / diagnoseBadTypeCode(typeCode); / } Review Comment: This commented-out line was reformatted into a single line with inline `/ ... /` markers: `//// if (null == type) { / diagnoseBadTypeCode(typeCode); / }`. As written, it’s difficult to read and looks like accidental corruption of the original commented code. Consider restoring the original multi-line commented block (or removing it entirely if it’s no longer needed) to keep the history readable. ########## uimaj-core/src/test/java/org/apache/uima/jcas/impl/LoadingBuiltinAnnotationBeforeCasTest.java: ########## @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.uima.jcas.impl; + +import static java.lang.System.getProperty; +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.ByteArrayOutputStream; +import java.io.File; + +import org.apache.uima.jcas.tcas.Annotation; +import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl; +import org.apache.uima.util.CasCreationUtils; +import org.junit.jupiter.api.Test; + +/** + * Reproduces <a href="https://github.com/apache/uima-uimaj/issues/234">issue #234</a>: when a + * built-in Annotation cover class is touched before any CAS exists, the type system records a wrong + * (zero) jcasType for it and Annotation's feature callsites are never updated. Subsequent + * {@code getCasType} / {@code setBegin} calls then fail. + * + * <p> + * The bug only manifests in a fresh JVM where nothing has yet initialized {@link Annotation}. Class + * initialization is once-per-JVM, so running this in the shared Surefire JVM (where other tests + * have already created CASes and thereby initialized Annotation normally) would not reproduce it. + * The test therefore forks a dedicated JVM running {@link #main(String[])} and asserts on its exit + * code. + */ +public class LoadingBuiltinAnnotationBeforeCasTest { + + /** + * Reproducer body. Must run in a fresh JVM (see the {@code @Test} below). Throws on failure so + * the JVM exits non-zero. + */ + public static void main(String[] args) throws Exception { + // Force Annotation's <clinit> to start now, before any TypeSystemImpl / CAS exists. + // Annotation's super (AnnotationBase) references TypeSystemImpl in its own <clinit>, which + // kicks off a recursive init storm that reads Annotation.typeIndexID while it's still 0 -- + // caching a broken JCasClassInfo and skipping Annotation's _FC_begin / _FC_end callsite update. + Class.forName(Annotation.class.getName()); + + var tsd = new TypeSystemDescription_impl(); + var jcas = CasCreationUtils.createCas(tsd, null, null).getJCas(); + + // (1) Type system lookup by JCas type id. With the bug, slot Annotation.type of + // jcasRegisteredTypes is empty (the JCCI got registered at slot 0 instead). + jcas.getCasType(Annotation.type); + + // (2) setDocumentText eventually calls Annotation.setBegin, which dispatches through the + // _FC_begin callsite. With the bug, that callsite still returns -1 and the offset lookup + // throws ArrayIndexOutOfBoundsException. + jcas.setDocumentText("hello"); + } + + @Test + void thatLoadingAnnotationBeforeCasDoesNotBreakTypeSystemManagement() throws Exception { + var windows = getProperty("os.name").toLowerCase().contains("win"); + var java = getProperty("java.home") + File.separator + "bin" + File.separator + + (windows ? "java.exe" : "java"); + + var pb = new ProcessBuilder(java, "-cp", getProperty("java.class.path"), getClass().getName()); + pb.redirectErrorStream(true); + var p = pb.start(); + + var buf = new ByteArrayOutputStream(); + p.getInputStream().transferTo(buf); + int exit = p.waitFor(); + + assertThat(exit).as("forked JVM exited non-zero; subprocess output was:%n%s", buf.toString()) + .isZero(); Review Comment: `buf.toString()` uses the platform default charset, which can make the failure message inconsistent across environments. Consider decoding the subprocess output with an explicit charset (e.g. UTF-8) when formatting the assertion message. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
