Author: lehmi Date: Mon Apr 21 10:53:42 2025 New Revision: 1925195 URL: http://svn.apache.org/viewvc?rev=1925195&view=rev Log: PDFBOX-4668: avoid NPE due to a missing font name
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java?rev=1925195&r1=1925194&r2=1925195&view=diff ============================================================================== --- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java (original) +++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java Mon Apr 21 10:53:42 2025 @@ -564,17 +564,10 @@ public class PDFTextStripper extends Leg // Resets the average character width when we see a change in font // or a change in the font size - if (lastPosition != null) + if (lastPosition != null + && hasFontOrSizeChanged(position, lastPosition.getTextPosition())) { - TextPosition lastTextPosition = lastPosition.getTextPosition(); - boolean fontHasChanged = !position.getFont().getName() - .equals(lastTextPosition.getFont().getName()); - boolean fontSizeChanged = Float.compare(position.getFontSize(), - lastTextPosition.getFontSize()) != 0; - if (fontHasChanged || fontSizeChanged) - { - previousAveCharWidth = -1; - } + previousAveCharWidth = -1; } float positionX; float positionY; @@ -735,6 +728,38 @@ public class PDFTextStripper extends Leg writePageEnd(); } + private boolean hasFontOrSizeChanged(TextPosition current, TextPosition last) + { + if (last == null) + { + return false; + } + // compare font sizes + if (Float.compare(current.getFontSize(), last.getFontSize()) != 0) + { + return true; + } + // compare font instances, may not work if the resource cache is disabled + if (current.getFont() == last.getFont()) + { + return false; + } + String currentFontName = current.getFont().getName(); + String lastFontName = last.getFont().getName(); + if (currentFontName != null) + { + // compare font names + return !currentFontName.equals(lastFontName); + } + if (lastFontName != null) + { + // currentFontName is null but lastFontName isn't -> font changes + return true; + } + // both fonts don't have a name -> compare hashes + return current.getFont().hashCode() != last.getFont().hashCode(); + } + private boolean overlap(float y1, float height1, float y2, float height2) { return within(y1, y2, .1f) || y2 <= y1 && y2 >= y1 - height1