Can we add a license to the TestIdentityHashSet and TestRamUsageEstimatorOnWildAnimals? :)
On Fri, Mar 23, 2012 at 1:05 PM, <[email protected]> wrote: > Author: uschindler > Date: Fri Mar 23 17:05:58 2012 > New Revision: 1304485 > > URL: http://svn.apache.org/viewvc?rev=1304485&view=rev > Log: > LUCENE-3867: Fix incorrect short-circuit, improve memory usage. > > Added: > > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java > (with props) > > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java > (with props) > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java > > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java > > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java?rev=1304485&r1=1304484&r2=1304485&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java > Fri Mar 23 17:05:58 2012 > @@ -27,6 +27,11 @@ import org.apache.lucene.LucenePackage; > public final class Constants { > private Constants() {} // can't construct > > + /** JVM vendor info. */ > + public static final String JVM_VENDOR = > System.getProperty("java.vm.vendor"); > + public static final String JVM_VERSION = > System.getProperty("java.vm.version"); > + public static final String JVM_NAME = System.getProperty("java.vm.name"); > + > /** The value of <tt>System.getProperty("java.version")<tt>. **/ > public static final String JAVA_VERSION = > System.getProperty("java.version"); > > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java?rev=1304485&r1=1304484&r2=1304485&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java > Fri Mar 23 17:05:58 2012 > @@ -24,14 +24,50 @@ import java.text.DecimalFormatSymbols; > import java.util.*; > > /** > - * Estimates the size of Java objects using a simple memory model > - * for primitive size information. > + * Estimates the size (memory representation) of Java objects. > + * > + * @see #sizeOf(Object) > + * @see #shallowSizeOf(Object) > + * @see #shallowSizeOfInstance(Class) > * > * @lucene.internal > */ > public final class RamUsageEstimator { > + /** > + * JVM diagnostic features. > + */ > + public static enum JvmFeature { > + OBJECT_REFERENCE_SIZE("Object reference size estimated using array index > scale."), > + ARRAY_HEADER_SIZE("Array header size estimated using array based > offset."), > + FIELD_OFFSETS("Shallow instance size based on field offsets."), > + OBJECT_ALIGNMENT("Object alignment retrieved from HotSpotDiagnostic MX > bean."); > + > + public final String description; > + > + private JvmFeature(String description) { > + this.description = description; > + } > + > + @Override > + public String toString() { > + return super.name() + " (" + description + ")"; > + } > + } > + > + /** JVM info string for debugging and reports. */ > + public final static String JVM_INFO_STRING; > + > + /** One kilobyte bytes. */ > + public static final long ONE_KB = 1024; > > - private RamUsageEstimator() {} // no instance > + /** One megabyte bytes. */ > + public static final long ONE_MB = ONE_KB * ONE_KB; > + > + /** One gigabyte bytes.*/ > + public static final long ONE_GB = ONE_KB * ONE_MB; > + > + /** No instantiation. */ > + private RamUsageEstimator() {} > > public final static int NUM_BYTES_BOOLEAN = 1; > public final static int NUM_BYTES_BYTE = 1; > @@ -42,9 +78,19 @@ public final class RamUsageEstimator { > public final static int NUM_BYTES_LONG = 8; > public final static int NUM_BYTES_DOUBLE = 8; > > + /** > + * Number of bytes this jvm uses to represent an object reference. > + */ > public final static int NUM_BYTES_OBJECT_REF; > - > + > + /** > + * Number of bytes to represent an object header (no fields, no > alignments). > + */ > public final static int NUM_BYTES_OBJECT_HEADER; > + > + /** > + * Number of bytes to represent an array header (no content, but with > alignments). > + */ > public final static int NUM_BYTES_ARRAY_HEADER; > > /** > @@ -69,9 +115,20 @@ public final class RamUsageEstimator { > primitiveSizes.put(long.class, Integer.valueOf(NUM_BYTES_LONG)); > } > > + /** > + * A handle to <code>sun.misc.Unsafe</code>. > + */ > private final static Object theUnsafe; > + > + /** > + * A handle to <code>sun.misc.Unsafe#fieldOffset(Field)</code>. > + */ > private final static Method objectFieldOffsetMethod; > - private final static boolean useUnsafe, isSupportedJVM; > + > + /** > + * All the supported "internal" JVM features detected at clinit. > + */ > + private final static EnumSet<JvmFeature> supportedFeatures; > > /** > * Initialize constants and try to collect information about the JVM > internals. > @@ -85,80 +142,71 @@ public final class RamUsageEstimator { > // so on 64 bit JVMs it'll be align(16 + 4, @8) = 24. > int arrayHeader = Constants.JRE_IS_64BIT ? 24 : 12; > > - Object unsafe = null; > - Method objectFieldOffsetM = null; > - boolean supportedJvm = true; > + supportedFeatures = EnumSet.noneOf(JvmFeature.class); > + > + Class<?> unsafeClass = null; > + Object tempTheUnsafe = null; > try { > - final Class<?> unsafeClass = Class.forName("sun.misc.Unsafe"); > + unsafeClass = Class.forName("sun.misc.Unsafe"); > final Field unsafeField = unsafeClass.getDeclaredField("theUnsafe"); > unsafeField.setAccessible(true); > - unsafe = unsafeField.get(null); > - > - // get object reference size by getting scale factor of Object[] > arrays: > - try { > - final Method arrayIndexScaleM = > unsafeClass.getMethod("arrayIndexScale", Class.class); > - referenceSize = ((Number) arrayIndexScaleM.invoke(unsafe, > Object[].class)).intValue(); > - } catch (Exception e) { > - // ignore > - supportedJvm = false; > - } > - > - // updated best guess based on reference size: > - objectHeader = Constants.JRE_IS_64BIT ? (8 + referenceSize) : 8; > - arrayHeader = Constants.JRE_IS_64BIT ? (8 + 2 * referenceSize) : 12; > - > - // get the object header size: > - // - first try out if the field offsets are not scaled (see warning in > Unsafe docs) > - // - get the object header size by getting the field offset of the > first field of a dummy object > - // If the scaling is byte-wise and unsafe is available, enable dynamic > size measurement for > - // estimateRamUsage(). > - try { > - objectFieldOffsetM = unsafeClass.getMethod("objectFieldOffset", > Field.class); > - final Field dummy1Field = > DummyTwoLongObject.class.getDeclaredField("dummy1"); > - final int ofs1 = ((Number) objectFieldOffsetM.invoke(unsafe, > dummy1Field)).intValue(); > - final Field dummy2Field = > DummyTwoLongObject.class.getDeclaredField("dummy2"); > - final int ofs2 = ((Number) objectFieldOffsetM.invoke(unsafe, > dummy2Field)).intValue(); > - if (Math.abs(ofs2 - ofs1) == NUM_BYTES_LONG) { > - final Field baseField = > DummyOneFieldObject.class.getDeclaredField("base"); > - objectHeader = ((Number) objectFieldOffsetM.invoke(unsafe, > baseField)).intValue(); > - } else { > - // it is not safe to use Unsafe.objectFieldOffset(), > - // as it may be scaled (see "cookie" comment in Unsafe), better > use defaults > - // and conventional size estimation: > - objectFieldOffsetM = null; > - supportedJvm = false; > - } > - } catch (Exception e) { > - // on exception ensure useUnsafe will be set to false later: > - objectFieldOffsetM = null; > - supportedJvm = false; > - } > + tempTheUnsafe = unsafeField.get(null); > + } catch (Exception e) { > + // Ignore. > + } > + theUnsafe = tempTheUnsafe; > > - // Get the array header size by retrieving the array base offset > - // (offset of the first element of an array). > - try { > - final Method arrayBaseOffsetM = > unsafeClass.getMethod("arrayBaseOffset", Class.class); > - // we calculate that only for byte[] arrays, it's actually the same > for all types: > - arrayHeader = ((Number) arrayBaseOffsetM.invoke(unsafe, > byte[].class)).intValue(); > - } catch (Exception e) { > - // ignore > - supportedJvm = false; > + // get object reference size by getting scale factor of Object[] arrays: > + try { > + final Method arrayIndexScaleM = > unsafeClass.getMethod("arrayIndexScale", Class.class); > + referenceSize = ((Number) arrayIndexScaleM.invoke(theUnsafe, > Object[].class)).intValue(); > + supportedFeatures.add(JvmFeature.OBJECT_REFERENCE_SIZE); > + } catch (Exception e) { > + // ignore. > + } > + > + // "best guess" based on reference size. We will attempt to modify > + // these to exact values if there is supported infrastructure. > + objectHeader = Constants.JRE_IS_64BIT ? (8 + referenceSize) : 8; > + arrayHeader = Constants.JRE_IS_64BIT ? (8 + 2 * referenceSize) : 12; > + > + // get the object header size: > + // - first try out if the field offsets are not scaled (see warning in > Unsafe docs) > + // - get the object header size by getting the field offset of the first > field of a dummy object > + // If the scaling is byte-wise and unsafe is available, enable dynamic > size measurement for > + // estimateRamUsage(). > + Method tempObjectFieldOffsetMethod = null; > + try { > + final Method objectFieldOffsetM = > unsafeClass.getMethod("objectFieldOffset", Field.class); > + final Field dummy1Field = > DummyTwoLongObject.class.getDeclaredField("dummy1"); > + final int ofs1 = ((Number) objectFieldOffsetM.invoke(theUnsafe, > dummy1Field)).intValue(); > + final Field dummy2Field = > DummyTwoLongObject.class.getDeclaredField("dummy2"); > + final int ofs2 = ((Number) objectFieldOffsetM.invoke(theUnsafe, > dummy2Field)).intValue(); > + if (Math.abs(ofs2 - ofs1) == NUM_BYTES_LONG) { > + final Field baseField = > DummyOneFieldObject.class.getDeclaredField("base"); > + objectHeader = ((Number) objectFieldOffsetM.invoke(theUnsafe, > baseField)).intValue(); > + supportedFeatures.add(JvmFeature.FIELD_OFFSETS); > + tempObjectFieldOffsetMethod = objectFieldOffsetM; > } > } catch (Exception e) { > - // ignore > - supportedJvm = false; > + // Ignore. > + } > + objectFieldOffsetMethod = tempObjectFieldOffsetMethod; > + > + // Get the array header size by retrieving the array base offset > + // (offset of the first element of an array). > + try { > + final Method arrayBaseOffsetM = > unsafeClass.getMethod("arrayBaseOffset", Class.class); > + // we calculate that only for byte[] arrays, it's actually the same > for all types: > + arrayHeader = ((Number) arrayBaseOffsetM.invoke(theUnsafe, > byte[].class)).intValue(); > + supportedFeatures.add(JvmFeature.ARRAY_HEADER_SIZE); > + } catch (Exception e) { > + // Ignore. > } > > NUM_BYTES_OBJECT_REF = referenceSize; > NUM_BYTES_OBJECT_HEADER = objectHeader; > NUM_BYTES_ARRAY_HEADER = arrayHeader; > - useUnsafe = (unsafe != null && objectFieldOffsetM != null); > - if (useUnsafe) { > - theUnsafe = unsafe; > - objectFieldOffsetMethod = objectFieldOffsetM; > - } else { > - theUnsafe = objectFieldOffsetMethod = null; > - } > > // Try to get the object alignment (the default seems to be 8 on Hotspot, > // regardless of the architecture). > @@ -176,18 +224,34 @@ public final class RamUsageEstimator { > objectAlignment = Integer.parseInt( > > vmOption.getClass().getMethod("getValue").invoke(vmOption).toString() > ); > + supportedFeatures.add(JvmFeature.OBJECT_ALIGNMENT); > } catch (InvocationTargetException ite) { > if (!(ite.getCause() instanceof IllegalArgumentException)) > throw ite; > // ignore the error completely and use default of 8 (32 bit JVMs). > } > } catch (Exception e) { > - // ignore > - supportedJvm = false; > + // Ignore. > } > + > NUM_BYTES_OBJECT_ALIGNMENT = objectAlignment; > > - isSupportedJVM = supportedJvm; > + JVM_INFO_STRING = "[JVM: " + > + Constants.JVM_NAME + ", " + Constants.JVM_VERSION + ", " + > Constants.JVM_VENDOR + ", " + > + Constants.JAVA_VENDOR + ", " + Constants.JAVA_VERSION + "]"; > + } > + > + /** > + * Cached information about a given class. > + */ > + private static final class ClassCache { > + public final long alignedShallowInstanceSize; > + public final Field[] referenceFields; > + > + public ClassCache(long alignedShallowInstanceSize, Field[] > referenceFields) { > + this.alignedShallowInstanceSize = alignedShallowInstanceSize; > + this.referenceFields = referenceFields; > + } > } > > // Object with just one field to determine the object header size by > getting the offset of the dummy field: > @@ -204,14 +268,14 @@ public final class RamUsageEstimator { > } > > /** > - * Returns true, if the current JVM is supported by {@code > RamUsageEstimator}. > + * Returns true, if the current JVM is fully supported by {@code > RamUsageEstimator}. > * If this method returns {@code false} you are maybe using a 3rd party > Java VM > * that is not supporting Oracle/Sun private APIs. The memory estimates can > be > * imprecise then (no way of detecting compressed references, alignments, > etc.). > * Lucene still tries to use sensible defaults. > */ > public static boolean isSupportedJVM() { > - return isSupportedJVM; > + return supportedFeatures.size() == JvmFeature.values().length; > } > > /** > @@ -272,13 +336,7 @@ public final class RamUsageEstimator { > * should be GCed.</p> > */ > public static long sizeOf(Object obj) { > - final Set<Object> seen = Collections.newSetFromMap(new > IdentityHashMap<Object,Boolean>(64)); > - try { > - return measureObjectSize(obj, seen); > - } finally { > - // Help the GC. > - seen.clear(); > - } > + return measureObjectSize(obj); > } > > /** > @@ -292,7 +350,7 @@ public final class RamUsageEstimator { > if (obj == null) return 0; > final Class<?> clz = obj.getClass(); > if (clz.isArray()) { > - return measureArraySize(obj, null); > + return shallowSizeOfArray(obj); > } else { > return shallowSizeOfInstance(clz); > } > @@ -302,8 +360,8 @@ public final class RamUsageEstimator { > * Returns the shallow instance size in bytes an instance of the given > class would occupy. > * This works with all conventional classes and primitive types, but not > with arrays > * (the size then depends on the number of elements and varies from object > to object). > - * Use the array-instance methods instead. > * > + * @see #shallowSizeOf(Object) > * @throws IllegalArgumentException if {@code clazz} is an array class. > */ > public static long shallowSizeOfInstance(Class<?> clazz) { > @@ -313,87 +371,163 @@ public final class RamUsageEstimator { > return primitiveSizes.get(clazz); > > long size = NUM_BYTES_OBJECT_HEADER; > - > + > // Walk type hierarchy > - while (clazz != null) { > + for (;clazz != null; clazz = clazz.getSuperclass()) { > final Field[] fields = clazz.getDeclaredFields(); > - boolean fieldFound = false; > - for (final Field f : fields) { > - if (Modifier.isStatic(f.getModifiers())) { > - continue; > + for (Field f : fields) { > + if (!Modifier.isStatic(f.getModifiers())) { > + size = adjustForField(size, f); > } > - > - size = reflectFieldSize(size, f); > - fieldFound = true; > } > - if (useUnsafe && fieldFound) { > - // no need to recurse to superclasses, as all fields are > - // added at the end, so we won't find any larger offset > - break; > - } > - clazz = clazz.getSuperclass(); > } > return alignObjectSize(size); > } > > /** > - * Recursive descend into an object. > + * Return shallow size of any <code>array</code>. > */ > - private static long measureObjectSize(Object obj, Set<Object> seen) { > - if (obj == null) { > - return 0; > + private static long shallowSizeOfArray(Object array) { > + long size = NUM_BYTES_ARRAY_HEADER; > + final int len = Array.getLength(array); > + if (len > 0) { > + Class<?> arrayElementClazz = array.getClass().getComponentType(); > + if (arrayElementClazz.isPrimitive()) { > + size += (long) len * primitiveSizes.get(arrayElementClazz); > + } else { > + size += (long) NUM_BYTES_OBJECT_REF * len; > + } > } > + return alignObjectSize(size); > + } > > - // skip if we have seen before > - if (seen.contains(obj)) { > - return 0; > - } > + /* > + * Non-recursive version of object descend. This consumes more memory than > recursive in-depth > + * traversal but prevents stack overflows on long chains of objects > + * or complex graphs (a max. recursion depth on my machine was ~5000 > objects linked in a chain > + * so not too much). > + */ > + private static long measureObjectSize(Object root) { > + // Objects seen so far. > + final IdentityHashSet<Object> seen = new IdentityHashSet<Object>(); > + // Class cache with reference Field and precalculated shallow size. > + final IdentityHashMap<Class<?>, ClassCache> classCache = new > IdentityHashMap<Class<?>, ClassCache>(); > + // Stack of objects pending traversal. Recursion caused stack overflows. > + final ArrayList<Object> stack = new ArrayList<Object>(); > + stack.add(root); > + > + long totalSize = 0; > + while (!stack.isEmpty()) { > + final Object ob = stack.remove(stack.size() - 1); > > - // add to seen > - seen.add(obj); > + if (ob == null || seen.contains(ob)) { > + continue; > + } > + seen.add(ob); > > - Class<?> clazz = obj.getClass(); > - if (clazz.isArray()) { > - return measureArraySize(obj, seen); > + final Class<?> obClazz = ob.getClass(); > + if (obClazz.isArray()) { > + /* > + * Consider an array, possibly of primitive types. Push any of its > references to > + * the processing stack and accumulate this array's shallow size. > + */ > + long size = NUM_BYTES_ARRAY_HEADER; > + final int len = Array.getLength(ob); > + if (len > 0) { > + Class<?> componentClazz = obClazz.getComponentType(); > + if (componentClazz.isPrimitive()) { > + size += (long) len * primitiveSizes.get(componentClazz); > + } else { > + size += (long) NUM_BYTES_OBJECT_REF * len; > + > + // Push refs for traversal later. > + for (int i = len; --i >= 0 ;) { > + final Object o = Array.get(ob, i); > + if (o != null && !seen.contains(o)) { > + stack.add(o); > + } > + } > + } > + } > + totalSize += alignObjectSize(size); > + } else { > + /* > + * Consider an object. Push any references it has to the processing > stack > + * and accumulate this object's shallow size. > + */ > + try { > + ClassCache cachedInfo = classCache.get(obClazz); > + if (cachedInfo == null) { > + classCache.put(obClazz, cachedInfo = createCacheEntry(obClazz)); > + } > + > + for (Field f : cachedInfo.referenceFields) { > + // Fast path to eliminate redundancies. > + final Object o = f.get(ob); > + if (o != null && !seen.contains(o)) { > + stack.add(o); > + } > + } > + > + totalSize += cachedInfo.alignedShallowInstanceSize; > + } catch (IllegalAccessException e) { > + // this should never happen as we enabled setAccessible(). > + throw new RuntimeException("Reflective field access failed?", e); > + } > + } > } > > - long size = NUM_BYTES_OBJECT_HEADER; > - long innerSize = 0L; > + // Help the GC (?). > + seen.clear(); > + stack.clear(); > + classCache.clear(); > > - // walk type hierarchy > - while (clazz != null) { > - final Field[] fields = clazz.getDeclaredFields(); > + return totalSize; > + } > + > + /** > + * Create a cached information about shallow size and reference fields for > + * a given class. > + */ > + private static ClassCache createCacheEntry(final Class<?> clazz) { > + ClassCache cachedInfo; > + long shallowInstanceSize = NUM_BYTES_OBJECT_HEADER; > + final ArrayList<Field> referenceFields = new ArrayList<Field>(32); > + for (Class<?> c = clazz; c != null; c = c.getSuperclass()) { > + final Field[] fields = c.getDeclaredFields(); > for (final Field f : fields) { > - if (Modifier.isStatic(f.getModifiers())) { > - continue; > - } > + if (!Modifier.isStatic(f.getModifiers())) { > + shallowInstanceSize = adjustForField(shallowInstanceSize, f); > > - size = reflectFieldSize(size, f); > - > - if (!f.getType().isPrimitive()) { > - try { > + if (!f.getType().isPrimitive()) { > f.setAccessible(true); > - innerSize += measureObjectSize(f.get(obj), seen); > - } catch (IllegalAccessException ex) { > - // this should never happen as we enable setAccessible()! > - throw new RuntimeException("Cannot reflect instance field: " + > - f.getDeclaringClass().getName() + "#" + f.getName(), ex); > + referenceFields.add(f); > } > } > } > - clazz = clazz.getSuperclass(); > } > - return alignObjectSize(size) + innerSize; > + > + cachedInfo = new ClassCache( > + alignObjectSize(shallowInstanceSize), > + referenceFields.toArray(new Field[referenceFields.size()])); > + return cachedInfo; > } > - > - private static long reflectFieldSize(long size, final Field f) { > + > + /** > + * This method returns the maximum representation size of an object. > <code>sizeSoFar</code> > + * is the object's size measured so far. <code>f</code> is the field being > probed. > + * > + * <p>The returned offset will be the maximum of whatever was measured so > far and > + * <code>f</code> field's offset and representation size (unaligned). > + */ > + private static long adjustForField(long sizeSoFar, final Field f) { > final Class<?> type = f.getType(); > final int fsize = type.isPrimitive() ? primitiveSizes.get(type) : > NUM_BYTES_OBJECT_REF; > - if (useUnsafe) { > + if (objectFieldOffsetMethod != null) { > try { > final long offsetPlusSize = > ((Number) objectFieldOffsetMethod.invoke(theUnsafe, f)).longValue() > + fsize; > - return Math.max(size, offsetPlusSize); > + return Math.max(sizeSoFar, offsetPlusSize); > } catch (IllegalAccessException ex) { > throw new RuntimeException("Access problem with sun.misc.Unsafe", ex); > } catch (InvocationTargetException ite) { > @@ -409,40 +543,22 @@ public final class RamUsageEstimator { > f.getDeclaringClass().getName() + "#" + f.getName(), cause); > } > } else { > - return size + fsize; > + // TODO: No alignments based on field type/ subclass fields alignments? > + return sizeSoFar + fsize; > } > } > > - /** > - * Return the deep size of an <code>array</code>, including > - * sub-objects if there are any. > - * > - * @param seen A set of already seen objects. If <code>null</code> no > references > - * are followed and this method returns shallow size. > - */ > - private static long measureArraySize(Object array, Set<Object> seen) { > - long size = NUM_BYTES_ARRAY_HEADER; > - final int len = Array.getLength(array); > - if (len > 0) { > - Class<?> arrayElementClazz = array.getClass().getComponentType(); > - if (arrayElementClazz.isPrimitive()) { > - size += (long) len * primitiveSizes.get(arrayElementClazz); > - } else { > - size += (long) NUM_BYTES_OBJECT_REF * len; > - if (seen != null) { > - for (int i = 0; i < len; i++) { > - size += measureObjectSize(Array.get(array, i), seen); > - } > - } > - } > - } > - > - return alignObjectSize(size); > + /** Return the set of unsupported JVM features that improve the > estimation. */ > + public static EnumSet<JvmFeature> getUnsupportedFeatures() { > + EnumSet<JvmFeature> unsupported = EnumSet.allOf(JvmFeature.class); > + unsupported.removeAll(supportedFeatures); > + return unsupported; > } > > - public static final long ONE_KB = 1024; > - public static final long ONE_MB = ONE_KB * ONE_KB; > - public static final long ONE_GB = ONE_KB * ONE_MB; > + /** Return the set of supported JVM features that improve the estimation. > */ > + public static EnumSet<JvmFeature> getSupportedFeatures() { > + return EnumSet.copyOf(supportedFeatures); > + } > > /** > * Returns <code>size</code> in human-readable units (GB, MB, KB or bytes). > @@ -466,4 +582,252 @@ public final class RamUsageEstimator { > return bytes + " bytes"; > } > } > + > + /** > + * Return a human-readable size of a given object. > + * @see #sizeOf(Object) > + * @see #humanReadableUnits(long) > + */ > + public static String humanSizeOf(Object object) { > + return humanReadableUnits(sizeOf(object)); > + } > + > + /** > + * An identity hash set implemented using open addressing. No null keys > are allowed. > + * > + * TODO: If this is useful outside this class, make it public - needs some > work > + */ > + static final class IdentityHashSet<KType> implements Iterable<KType> { > + /** > + * Default load factor. > + */ > + public final static float DEFAULT_LOAD_FACTOR = 0.75f; > + > + /** > + * Minimum capacity for the set. > + */ > + public final static int MIN_CAPACITY = 4; > + > + /** > + * All of set entries. Always of power of two length. > + */ > + public Object[] keys; > + > + /** > + * Cached number of assigned slots. > + */ > + public int assigned; > + > + /** > + * The load factor for this set (fraction of allocated or deleted slots > before > + * the buffers must be rehashed or reallocated). > + */ > + public final float loadFactor; > + > + /** > + * Cached capacity threshold at which we must resize the buffers. > + */ > + private int resizeThreshold; > + > + /** > + * Creates a hash set with the default capacity of 16. > + * load factor of {@value #DEFAULT_LOAD_FACTOR}. ` > + */ > + public IdentityHashSet() { > + this(16, DEFAULT_LOAD_FACTOR); > + } > + > + /** > + * Creates a hash set with the given capacity, load factor of > + * {@value #DEFAULT_LOAD_FACTOR}. > + */ > + public IdentityHashSet(int initialCapacity) { > + this(initialCapacity, DEFAULT_LOAD_FACTOR); > + } > + > + /** > + * Creates a hash set with the given capacity and load factor. > + */ > + public IdentityHashSet(int initialCapacity, float loadFactor) { > + initialCapacity = Math.max(MIN_CAPACITY, initialCapacity); > + > + assert initialCapacity > 0 : "Initial capacity must be between (0, " > + + Integer.MAX_VALUE + "]."; > + assert loadFactor > 0 && loadFactor < 1 : "Load factor must be between > (0, 1)."; > + this.loadFactor = loadFactor; > + allocateBuffers(roundCapacity(initialCapacity)); > + } > + > + /** > + * Adds a reference to the set. Null keys are not allowed. > + */ > + public boolean add(KType e) { > + assert e != null : "Null keys not allowed."; > + > + if (assigned >= resizeThreshold) expandAndRehash(); > + > + final int mask = keys.length - 1; > + int slot = rehash(e) & mask; > + Object existing; > + while ((existing = keys[slot]) != null) { > + if (e == existing) { > + return false; // already found. > + } > + slot = (slot + 1) & mask; > + } > + assigned++; > + keys[slot] = e; > + return true; > + } > + > + /** > + * Checks if the set contains a given ref. > + */ > + public boolean contains(KType e) { > + final int mask = keys.length - 1; > + int slot = rehash(e) & mask; > + Object existing; > + while ((existing = keys[slot]) != null) { > + if (e == existing) { > + return true; > + } > + slot = (slot + 1) & mask; > + } > + return false; > + } > + > + /** Rehash via MurmurHash. > + * > + * <p>The implementation is based on the > + * finalization step from Austin Appleby's > + * <code>MurmurHash3</code>. > + * > + * @see "http://sites.google.com/site/murmurhash/" > + */ > + private static int rehash(Object o) { > + int k = System.identityHashCode(o); > + k ^= k >>> 16; > + k *= 0x85ebca6b; > + k ^= k >>> 13; > + k *= 0xc2b2ae35; > + k ^= k >>> 16; > + return k; > + } > + > + /** > + * Expand the internal storage buffers (capacity) or rehash current keys > and > + * values if there are a lot of deleted slots. > + */ > + private void expandAndRehash() { > + final Object[] oldKeys = this.keys; > + > + assert assigned >= resizeThreshold; > + allocateBuffers(nextCapacity(keys.length)); > + > + /* > + * Rehash all assigned slots from the old hash table. > + */ > + final int mask = keys.length - 1; > + for (int i = 0; i < oldKeys.length; i++) { > + final Object key = oldKeys[i]; > + if (key != null) { > + int slot = rehash(key) & mask; > + while (keys[slot] != null) { > + slot = (slot + 1) & mask; > + } > + keys[slot] = key; > + } > + } > + Arrays.fill(oldKeys, null); > + } > + > + /** > + * Allocate internal buffers for a given capacity. > + * > + * @param capacity > + * New capacity (must be a power of two). > + */ > + private void allocateBuffers(int capacity) { > + this.keys = new Object[capacity]; > + this.resizeThreshold = (int) (capacity * DEFAULT_LOAD_FACTOR); > + } > + > + /** > + * Return the next possible capacity, counting from the current buffers' > size. > + */ > + protected int nextCapacity(int current) { > + assert current > 0 && Long.bitCount(current) == 1 : "Capacity must be > a power of two."; > + assert ((current << 1) > 0) : "Maximum capacity exceeded (" > + + (0x80000000 >>> 1) + ")."; > + > + if (current < MIN_CAPACITY / 2) current = MIN_CAPACITY / 2; > + return current << 1; > + } > + > + /** > + * Round the capacity to the next allowed value. > + */ > + protected int roundCapacity(int requestedCapacity) { > + // Maximum positive integer that is a power of two. > + if (requestedCapacity > (0x80000000 >>> 1)) return (0x80000000 >>> 1); > + > + int capacity = MIN_CAPACITY; > + while (capacity < requestedCapacity) { > + capacity <<= 1; > + } > + > + return capacity; > + } > + > + public void clear() { > + assigned = 0; > + Arrays.fill(keys, null); > + } > + > + public int size() { > + return assigned; > + } > + > + public boolean isEmpty() { > + return size() == 0; > + } > + > + @Override > + public Iterator<KType> iterator() { > + return new Iterator<KType>() { > + int pos = -1; > + Object nextElement = fetchNext(); > + > + @Override > + public boolean hasNext() { > + return nextElement != null; > + } > + > + @SuppressWarnings("unchecked") > + @Override > + public KType next() { > + Object r = this.nextElement; > + if (r == null) { > + throw new NoSuchElementException(); > + } > + this.nextElement = fetchNext(); > + return (KType) r; > + } > + > + private Object fetchNext() { > + pos++; > + while (pos < keys.length && keys[pos] == null) { > + pos++; > + } > + > + return (pos >= keys.length ? null : keys[pos]); > + } > + > + @Override > + public void remove() { > + throw new UnsupportedOperationException(); > + } > + }; > + } > + } > } > > Added: > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java?rev=1304485&view=auto > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java > (added) > +++ > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java > Fri Mar 23 17:05:58 2012 > @@ -0,0 +1,39 @@ > +package org.apache.lucene.util; > + > +import java.util.*; > + > +import org.junit.Assert; > +import org.junit.Test; > + > +public class TestIdentityHashSet extends LuceneTestCase { > + @Test > + public void testCheck() { > + Random rnd = random; > + Set<Object> jdk = Collections.newSetFromMap( > + new IdentityHashMap<Object,Boolean>()); > + RamUsageEstimator.IdentityHashSet<Object> us = new > RamUsageEstimator.IdentityHashSet<Object>(); > + > + int max = 100000; > + int threshold = 256; > + for (int i = 0; i < max; i++) { > + // some of these will be interned and some will not so there will be > collisions. > + Integer v = rnd.nextInt(threshold); > + > + boolean e1 = jdk.contains(v); > + boolean e2 = us.contains(v); > + Assert.assertEquals(e1, e2); > + > + e1 = jdk.add(v); > + e2 = us.add(v); > + Assert.assertEquals(e1, e2); > + } > + > + Set<Object> collected = Collections.newSetFromMap( > + new IdentityHashMap<Object,Boolean>()); > + for (Object o : us) { > + collected.add(o); > + } > + > + Assert.assertEquals(collected, jdk); > + } > +} > > Modified: > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java?rev=1304485&r1=1304484&r2=1304485&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java > Fri Mar 23 17:05:58 2012 > @@ -22,86 +22,86 @@ import java.util.Random; > */ > > public class TestRamUsageEstimator extends LuceneTestCase { > + public void testSanity() { > + assertTrue(sizeOf(new String("test string")) > > shallowSizeOfInstance(String.class)); > > - public void testBasic() { > - assertTrue(sizeOf(new String("test strin")) > > shallowSizeOfInstance(String.class)); > - > Holder holder = new Holder(); > holder.holder = new Holder("string2", 5000L); > assertTrue(sizeOf(holder) > shallowSizeOfInstance(Holder.class)); > assertTrue(sizeOf(holder) > sizeOf(holder.holder)); > > - assertTrue(shallowSizeOfInstance(HolderSubclass.class) >= > shallowSizeOfInstance(Holder.class)); > - assertEquals(shallowSizeOfInstance(Holder.class), > shallowSizeOfInstance(HolderSubclass2.class)); > - > - String[] strings = new String[]{new String("test strin"), new > String("hollow"), new String("catchmaster")}; > + assertTrue( > + shallowSizeOfInstance(HolderSubclass.class) >= > shallowSizeOfInstance(Holder.class)); > + assertTrue( > + shallowSizeOfInstance(Holder.class) == > shallowSizeOfInstance(HolderSubclass2.class)); > + > + String[] strings = new String[] { > + new String("test string"), > + new String("hollow"), > + new String("catchmaster") > + }; > assertTrue(sizeOf(strings) > shallowSizeOf(strings)); > } > > public void testStaticOverloads() { > Random rnd = random; > - > { > - byte[] array = new byte [rnd.nextInt(1024)]; > + byte[] array = new byte[rnd.nextInt(1024)]; > assertEquals(sizeOf(array), sizeOf((Object) array)); > } > - > + > { > - boolean[] array = new boolean [rnd.nextInt(1024)]; > + boolean[] array = new boolean[rnd.nextInt(1024)]; > assertEquals(sizeOf(array), sizeOf((Object) array)); > } > - > + > { > - char[] array = new char [rnd.nextInt(1024)]; > + char[] array = new char[rnd.nextInt(1024)]; > assertEquals(sizeOf(array), sizeOf((Object) array)); > } > - > + > { > - short[] array = new short [rnd.nextInt(1024)]; > + short[] array = new short[rnd.nextInt(1024)]; > assertEquals(sizeOf(array), sizeOf((Object) array)); > } > - > + > { > - int[] array = new int [rnd.nextInt(1024)]; > + int[] array = new int[rnd.nextInt(1024)]; > assertEquals(sizeOf(array), sizeOf((Object) array)); > } > - > + > { > - float[] array = new float [rnd.nextInt(1024)]; > + float[] array = new float[rnd.nextInt(1024)]; > assertEquals(sizeOf(array), sizeOf((Object) array)); > } > - > + > { > - long[] array = new long [rnd.nextInt(1024)]; > + long[] array = new long[rnd.nextInt(1024)]; > assertEquals(sizeOf(array), sizeOf((Object) array)); > } > - > + > { > - double[] array = new double [rnd.nextInt(1024)]; > + double[] array = new double[rnd.nextInt(1024)]; > assertEquals(sizeOf(array), sizeOf((Object) array)); > } > } > - > + > public void testReferenceSize() { > if (!isSupportedJVM()) { > - System.err.println("WARN: Your JVM does not support the Oracle/Sun > extensions (Hotspot diagnostics, sun.misc.Unsafe),"); > - System.err.println("so the memory estimates may be inprecise."); > - System.err.println("Please report this to the Lucene mailing list, > noting your JVM version: " + > - Constants.JAVA_VENDOR + " " + Constants.JAVA_VERSION); > - } > - if (VERBOSE) { > - System.out.println("This JVM is 64bit: " + Constants.JRE_IS_64BIT); > - System.out.println("Reference size in this JVM: " + > NUM_BYTES_OBJECT_REF); > - System.out.println("Object header size in this JVM: " + > NUM_BYTES_OBJECT_HEADER); > - System.out.println("Array header size in this JVM: " + > NUM_BYTES_ARRAY_HEADER); > - System.out.println("Object alignment in this JVM: " + > NUM_BYTES_OBJECT_ALIGNMENT); > + System.err.println("WARN: Your JVM does not support certain Oracle/Sun > extensions."); > + System.err.println(" Memory estimates may be inaccurate."); > + System.err.println(" Please report this to the Lucene mailing > list. JVM version: " + RamUsageEstimator.JVM_INFO_STRING); > + for (JvmFeature f : RamUsageEstimator.getUnsupportedFeatures()) { > + System.err.println(" - " + f.toString()); > + } > } > + > assertTrue(NUM_BYTES_OBJECT_REF == 4 || NUM_BYTES_OBJECT_REF == 8); > if (!Constants.JRE_IS_64BIT) { > - assertEquals("For 32bit JVMs, reference size must always be 4", 4, > NUM_BYTES_OBJECT_REF); > + assertEquals("For 32bit JVMs, reference size must always be 4?", 4, > NUM_BYTES_OBJECT_REF); > } > } > - > + > @SuppressWarnings("unused") > private static class Holder { > long field1 = 5000L; > @@ -109,8 +109,7 @@ public class TestRamUsageEstimator exten > Holder holder; > long field2, field3, field4; > > - Holder() { > - } > + Holder() {} > > Holder(String name, long field1) { > this.name = name; > @@ -123,7 +122,7 @@ public class TestRamUsageEstimator exten > byte foo; > int bar; > } > - > + > private static class HolderSubclass2 extends Holder { > // empty, only inherits all fields -> size should be identical to > superclass > } > > Added: > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java?rev=1304485&view=auto > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java > (added) > +++ > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java > Fri Mar 23 17:05:58 2012 > @@ -0,0 +1,37 @@ > +package org.apache.lucene.util; > + > +import org.junit.Assert; > + > +/** > + * Check large and special graphs. > + */ > +public class TestRamUsageEstimatorOnWildAnimals extends LuceneTestCase { > + public static class ListElement { > + ListElement next; > + } > + > + public void testOverflowMaxChainLength() { > + int UPPERLIMIT = 100000; > + int lower = 0; > + int upper = UPPERLIMIT; > + > + while (lower + 1 < upper) { > + int mid = (lower + upper) / 2; > + try { > + ListElement first = new ListElement(); > + ListElement last = first; > + for (int i = 0; i < mid; i++) { > + last = (last.next = new ListElement()); > + } > + RamUsageEstimator.sizeOf(first); // cause SOE or pass. > + lower = mid; > + } catch (StackOverflowError e) { > + upper = mid; > + } > + } > + > + if (lower + 1 < UPPERLIMIT) { > + Assert.fail("Max object chain length till stack overflow: " + lower); > + } > + } > +} > > -- lucidimagination.com --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
