This is an automated email from the ASF dual-hosted git repository. rcordier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-project.git
commit fd83dae0bf5b626c6d0276523307ec1816f48b3e Author: Benoit TELLIER <[email protected]> AuthorDate: Tue Mar 26 15:21:19 2024 +0100 [PERF] JMAP download: simplify part parsing Overall computing unused message sizes takes up to 2% of the CPU of the JMAP pods --- .../org/apache/james/jmap/mail/EmailBodyPart.scala | 20 +-- .../james/jmap/mail/MinimalEmailBodyPart.scala | 138 +++++++++++++++++++++ .../apache/james/jmap/routes/DownloadRoutes.scala | 14 +-- 3 files changed, 148 insertions(+), 24 deletions(-) diff --git a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/mail/EmailBodyPart.scala b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/mail/EmailBodyPart.scala index 28ecc58c62..b885e079fd 100644 --- a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/mail/EmailBodyPart.scala +++ b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/mail/EmailBodyPart.scala @@ -19,7 +19,6 @@ package org.apache.james.jmap.mail -import java.io.OutputStream import java.time.ZoneId import cats.implicits._ @@ -33,15 +32,13 @@ import org.apache.james.jmap.api.model.Size.Size import org.apache.james.jmap.core.Properties import org.apache.james.jmap.mail.EmailBodyPart.{FILENAME_PREFIX, MDN_TYPE, MULTIPART_ALTERNATIVE, TEXT_HTML, TEXT_PLAIN, of} import org.apache.james.jmap.mail.PartId.PartIdValue -import org.apache.james.jmap.mime4j.{ JamesBodyDescriptorBuilder, SizeUtils} -import org.apache.james.mailbox.model.{Cid, MessageAttachmentMetadata, MessageResult} +import org.apache.james.jmap.mime4j. SizeUtils +import org.apache.james.mailbox.model.{Cid, MessageAttachmentMetadata} import org.apache.james.mime4j.Charsets.ISO_8859_1 import org.apache.james.mime4j.codec.{DecodeMonitor, DecoderUtil} import org.apache.james.mime4j.dom.field.{ContentDispositionField, ContentLanguageField, ContentTypeField, FieldName} import org.apache.james.mime4j.dom.{Entity, Message, Multipart, TextBody => Mime4JTextBody} -import org.apache.james.mime4j.field.LenientFieldParser -import org.apache.james.mime4j.message.{BasicBodyFactory, DefaultMessageBuilder} -import org.apache.james.mime4j.stream.{Field, MimeConfig, RawField} +import org.apache.james.mime4j.stream.{Field, RawField} import org.apache.james.util.html.HtmlTextExtractor import scala.jdk.CollectionConverters._ @@ -77,17 +74,6 @@ object EmailBodyPart { val defaultProperties: Properties = Properties("partId", "blobId", "size", "name", "type", "charset", "disposition", "cid", "language", "location") val allowedProperties: Properties = defaultProperties ++ Properties("subParts", "headers") - def ofMessage(properties: Option[Properties], zoneId: ZoneId, blobId: BlobId, message: MessageResult): Try[EmailBodyPart] = { - val defaultMessageBuilder = new DefaultMessageBuilder - defaultMessageBuilder.setMimeEntityConfig(MimeConfig.PERMISSIVE) - defaultMessageBuilder.setDecodeMonitor(DecodeMonitor.SILENT) - defaultMessageBuilder.setBodyDescriptorBuilder(new JamesBodyDescriptorBuilder(null, LenientFieldParser.getParser, DecodeMonitor.SILENT)) - defaultMessageBuilder.setBodyFactory(new BasicBodyFactory(Email.defaultCharset)) - - val mime4JMessage = Try(defaultMessageBuilder.parseMessage(message.getFullContent.getInputStream)) - mime4JMessage.flatMap(of(properties, zoneId, blobId, _)) - } - def fromAttachment(properties: Option[Properties], zoneId: ZoneId, attachment: MessageAttachmentMetadata, entity: Message): EmailBodyPart = { def parseDisposition(attachment: MessageAttachmentMetadata): Option[Disposition] = if (attachment.isInline) { diff --git a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/mail/MinimalEmailBodyPart.scala b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/mail/MinimalEmailBodyPart.scala new file mode 100644 index 0000000000..56a03904f3 --- /dev/null +++ b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/mail/MinimalEmailBodyPart.scala @@ -0,0 +1,138 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.jmap.mail + +import java.time.ZoneId + +import cats.implicits._ +import eu.timepit.refined.auto._ +import eu.timepit.refined.numeric.NonNegative +import eu.timepit.refined.refineV +import org.apache.james.jmap.api.model.Size.Size +import org.apache.james.jmap.core.Properties +import org.apache.james.jmap.mail.MinimalEmailBodyPart.of +import org.apache.james.jmap.mime4j.{JamesBodyDescriptorBuilder, SizeUtils} +import org.apache.james.mailbox.model.MessageResult +import org.apache.james.mime4j.codec.DecodeMonitor +import org.apache.james.mime4j.dom.{Entity, Message, Multipart} +import org.apache.james.mime4j.field.LenientFieldParser +import org.apache.james.mime4j.message.{BasicBodyFactory, DefaultMessageBuilder} +import org.apache.james.mime4j.stream.MimeConfig + +import scala.jdk.CollectionConverters._ +import scala.util.{Failure, Success, Try} + +object MinimalEmailBodyPart { + val TEXT_PLAIN: Type = Type("text/plain") + val TEXT_HTML: Type = Type("text/html") + val MDN_TYPE: Type = Type("message/disposition-notification") + val MULTIPART_ALTERNATIVE: Type = Type("multipart/alternative") + val FILENAME_PREFIX = "name" + + val defaultProperties: Properties = Properties("partId", "blobId", "size", "name", "type", "charset", "disposition", "cid", "language", "location") + val allowedProperties: Properties = defaultProperties ++ Properties("subParts", "headers") + + def ofMessage(properties: Option[Properties], zoneId: ZoneId, blobId: BlobId, message: MessageResult): Try[MinimalEmailBodyPart] = { + val defaultMessageBuilder = new DefaultMessageBuilder + defaultMessageBuilder.setMimeEntityConfig(MimeConfig.PERMISSIVE) + defaultMessageBuilder.setDecodeMonitor(DecodeMonitor.SILENT) + defaultMessageBuilder.setBodyDescriptorBuilder(new JamesBodyDescriptorBuilder(null, LenientFieldParser.getParser, DecodeMonitor.SILENT)) + defaultMessageBuilder.setBodyFactory(new BasicBodyFactory(Email.defaultCharset)) + + val mime4JMessage = Try(defaultMessageBuilder.parseMessage(message.getFullContent.getInputStream)) + mime4JMessage.flatMap(of(properties, zoneId, blobId, _)) + } + + def of(properties: Option[Properties], zoneId: ZoneId, blobId: BlobId, message: Message): Try[MinimalEmailBodyPart] = + of(properties, zoneId, blobId, PartId(1), message).map(_._1) + + private def of(properties: Option[Properties], zoneId: ZoneId, blobId: BlobId, partId: PartId, entity: Entity): Try[(MinimalEmailBodyPart, PartId)] = + entity.getBody match { + case multipart: Multipart => + val scanResults: Try[List[(Option[MinimalEmailBodyPart], PartId)]] = multipart.getBodyParts + .asScala.toList + .scanLeft[Try[(Option[MinimalEmailBodyPart], PartId)]](Success((None, partId)))(traverse(properties, zoneId, blobId)) + .sequence + val highestPartIdValidation: Try[PartId] = scanResults.map(list => list.map(_._2).reverse.headOption.getOrElse(partId)) + val childrenValidation: Try[List[MinimalEmailBodyPart]] = scanResults.map(list => list.flatMap(_._1)) + + zip(childrenValidation, highestPartIdValidation) + .flatMap { + case (children, highestPartId) => of(None, partId, entity, Some(children)) + .map(part => (part, highestPartId)) + } + case _ => BlobId.of(blobId, partId) + .flatMap(blobId => of(Some(blobId), partId, entity, None)) + .map(part => (part, partId)) + } + + private def traverse(properties: Option[Properties], zoneId: ZoneId, blobId: BlobId)(acc: Try[(Option[MinimalEmailBodyPart], PartId)], entity: Entity): Try[(Option[MinimalEmailBodyPart], PartId)] = { + acc.flatMap { + case (_, previousPartId) => + val partId = previousPartId.next + + of(properties, zoneId, blobId, partId, entity) + .map({ + case (part, partId) => (Some(part), partId) + }) + } + } + + private def of(blobId: Option[BlobId], + partId: PartId, + entity: Entity, + subParts: Option[List[MinimalEmailBodyPart]]): Try[MinimalEmailBodyPart] = + Try(MinimalEmailBodyPart( + partId = partId, + blobId = blobId, + headers = entity.getHeader.getFields.asScala.toList.map(EmailHeader(_)), + `type` = Type(entity.getMimeType), + subParts = subParts, + entity = entity)) + + private def zip[A, B](a: Try[A], b: Try[B]): Try[(A, B)] = for { + aValue <- a + bValue <- b + } yield (aValue, bValue) +} +case class MinimalEmailBodyPart(partId: PartId, + blobId: Option[BlobId], + headers: List[EmailHeader], + `type`: Type, + subParts: Option[List[MinimalEmailBodyPart]], + entity: Entity) { + + def partWithBlobId(blobId: BlobId): Option[MinimalEmailBodyPart] = flatten.find(_.blobId.contains(blobId)) + + def nested(zoneId: ZoneId): Option[MinimalEmailBodyPart] = entity.getBody match { + case message: Message => of(None, zoneId, blobId.get, message).toOption + case _ => None + } + + def size: Try[Size] = refineSize(SizeUtils.sizeOf(entity)) + + private def refineSize(l: Long): Try[Size] = refineV[NonNegative](l) match { + case scala.Right(size) => Success(size) + case Left(e) => Failure(new IllegalArgumentException(e)) + } + + + def flatten: List[MinimalEmailBodyPart] = subParts.getOrElse(Nil).flatMap(part => part.flatten) ++ List(this) +} diff --git a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/routes/DownloadRoutes.scala b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/routes/DownloadRoutes.scala index ba047608bd..ce7ae1e87d 100644 --- a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/routes/DownloadRoutes.scala +++ b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/routes/DownloadRoutes.scala @@ -42,7 +42,7 @@ import org.apache.james.jmap.exceptions.UnauthorizedException import org.apache.james.jmap.http.Authenticator import org.apache.james.jmap.http.rfc8621.InjectionKeys import org.apache.james.jmap.json.ResponseSerializer -import org.apache.james.jmap.mail.{BlobId, EmailBodyPart} +import org.apache.james.jmap.mail.{BlobId, EmailBodyPart, MinimalEmailBodyPart} import org.apache.james.jmap.method.{AccountNotFoundException, ZoneIdProvider} import org.apache.james.jmap.routes.DownloadRoutes.{BUFFER_SIZE, LOGGER} import org.apache.james.jmap.{Endpoint, JMAPRoute, JMAPRoutes} @@ -124,8 +124,8 @@ case class AttachmentBlob(attachmentMetadata: AttachmentMetadata, fileContent: I override def blobId: BlobId = BlobId.of(attachmentMetadata.getAttachmentId.getId).get } -case class EmailBodyPartBlob(blobId: BlobId, part: EmailBodyPart) extends Blob { - override def size: Try[Size] = Success(part.size) +case class EmailBodyPartBlob(blobId: BlobId, part: MinimalEmailBodyPart) extends Blob { + override def size: Try[Size] = part.size override def contentType: ContentType = ContentType.of(part.`type`.value) @@ -211,13 +211,13 @@ class MessagePartBlobResolver @Inject()(val messageIdFactory: MessageId.Factory, case Success((messageId, blobIds)) => Applicable(SMono.fromPublisher( messageIdManager.getMessagesReactive(List(messageId).asJava, FetchGroup.FULL_CONTENT, mailboxSession)) - .handle[EmailBodyPart] { - case (message, sink) => EmailBodyPart.ofMessage(None, zoneIdSupplier.get(), BlobId.of(messageId).get, message) + .handle[MinimalEmailBodyPart] { + case (message, sink) => MinimalEmailBodyPart.ofMessage(None, zoneIdSupplier.get(), BlobId.of(messageId).get, message) .fold(sink.error, sink.next) } - .handle[EmailBodyPart] { + .handle[MinimalEmailBodyPart] { case (bodyStructure, sink) => - blobIds.foldLeft[Option[EmailBodyPart]](Some(bodyStructure)) { + blobIds.foldLeft[Option[MinimalEmailBodyPart]](Some(bodyStructure)) { case (None, _) => None case (Some(nestedBodyStructure), blobId) => nestedBodyStructure.partWithBlobId(blobId) .orElse(nestedBodyStructure.nested(zoneIdSupplier.get()).flatMap(_.partWithBlobId(blobId))) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
