ecki commented on code in PR #781: URL: https://github.com/apache/commons-io/pull/781#discussion_r2330587757
########## src/main/java/org/apache/commons/io/FileSystem.java: ########## @@ -530,4 +623,76 @@ CharSequence trimExtension(final CharSequence cs) { final int index = indexOf(cs, '.', 0); return index < 0 ? cs : cs.subSequence(0, index); } + + private boolean isLegalFileLength(final CharSequence candidate, final Charset charset) { + if (candidate == null || candidate.length() == 0) { + return false; + } + if (lengthUnit == LengthUnit.CHARS) { + return candidate.length() <= getMaxFileNameLength(); + } + final CharsetEncoder encoder = charset.newEncoder(); + try { + final ByteBuffer buffer = encoder.encode(CharBuffer.wrap(candidate)); + return buffer.remaining() <= getMaxFileNameLength(); + } catch (CharacterCodingException e) { + // If we can't encode, it's not legal + return false; + } + } + + CharSequence truncateFileName(final CharSequence candidate, final Charset charset) { + final int maxFileNameLength = getMaxFileNameLength(); + // Character-based limit: simple substring if needed. + if (lengthUnit == LengthUnit.CHARS) { + return candidate.length() <= maxFileNameLength ? candidate : candidate.subSequence(0, maxFileNameLength); + } + + // Byte-based limit + return truncateByBytes(candidate, charset, maxFileNameLength); + } + + static CharSequence truncateByBytes(final CharSequence candidate, final Charset charset, final int maxBytes) { + // Byte-based limit + final CharsetEncoder encoder = charset.newEncoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + + if (!encoder.canEncode(candidate)) { + throw new IllegalArgumentException( + "File name contains characters that cannot be encoded with charset " + charset.name()); + } + + // Fast path: if even the worst-case expansion fits, we're done. + if (candidate.length() <= Math.floor(maxBytes / encoder.maxBytesPerChar())) { + return candidate; + } + + // Slow path: encode into a fixed-size byte buffer. + final ByteBuffer out = ByteBuffer.allocate(maxBytes); + final CharBuffer in = CharBuffer.wrap(candidate); + + // Encode until the first character that would exceed the byte budget. + final CoderResult cr = encoder.encode(in, out, true); + + if (cr.isUnderflow()) { + // Entire candidate fit within maxFileNameLength bytes. + return candidate; + } + + // We ran out of space mid-encode: truncate BEFORE the offending character. + return candidate.subSequence(0, in.position()); + } + + /** + * Units of length for the file name and path length limits. + * Review Comment: in some conditions there are also different path styles, I think on windows the jdk even used \\?\ syntax if the path is longer than 256 to get around a api limitation. (Not sure I remeber that correctly). That was the reason why the prefix of the path also counts for special strategies. (So maybe you need a WINDOWS_UTF16_CHARS strategy (not a length mode)? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@commons.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org