chibenwa commented on a change in pull request #808:
URL: https://github.com/apache/james-project/pull/808#discussion_r780162174



##########
File path: 
server/queue/queue-pulsar/src/main/scala/org/apache/james/queue/pulsar/PulsarMailQueue.scala
##########
@@ -0,0 +1,600 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.queue.pulsar
+
+import akka.actor.{ActorRef, ActorSystem}
+import akka.stream.scaladsl.{Flow, Keep, RunnableGraph, Sink, Source, 
SourceQueueWithComplete, StreamConverters}
+import akka.stream.{Attributes, OverflowStrategy}
+import akka.util.Timeout
+import akka.{Done, NotUsed}
+import com.sksamuel.pulsar4s._
+import com.sksamuel.pulsar4s.akka.streams
+import com.sksamuel.pulsar4s.akka.streams.{CommittableMessage, Control}
+import org.apache.james.backends.pulsar.{PulsarConfiguration, PulsarReader}
+import org.apache.james.blob.api.{BlobId, Store}
+import org.apache.james.blob.mail.MimeMessagePartsId
+import org.apache.james.core.{MailAddress, MaybeSender}
+import org.apache.james.metrics.api.{GaugeRegistry, MetricFactory}
+import org.apache.james.queue.api.MailQueue._
+import org.apache.james.queue.api._
+import org.apache.james.queue.pulsar.EnqueueId.EnqueueId
+import org.apache.james.server.core.MailImpl
+import org.apache.mailet._
+import org.apache.pulsar.client.admin.PulsarAdmin
+import org.apache.pulsar.client.admin.PulsarAdminException.NotFoundException
+import org.apache.pulsar.client.api.{Schema, SubscriptionInitialPosition, 
SubscriptionType}
+import org.reactivestreams.Publisher
+import play.api.libs.json._
+
+import java.time.{Instant, ZonedDateTime, Duration => JavaDuration}
+import java.util.concurrent.TimeUnit
+import java.util.{Date, UUID}
+import javax.mail.MessagingException
+import javax.mail.internet.MimeMessage
+import scala.concurrent.duration._
+import scala.concurrent.{Await, ExecutionContext, ExecutionContextExecutor, 
Future, Promise}
+import scala.jdk.CollectionConverters._
+import scala.jdk.DurationConverters._
+import scala.math.Ordered.orderingToOrdered
+
+private[pulsar] object serializers {
+  implicit val headerFormat: Format[Header] = Json.format[Header]
+  implicit val enqueueIdFormat: Format[EnqueueId] = new Format[EnqueueId] {
+    override def writes(o: EnqueueId): JsValue = JsString(o.value)
+
+    override def reads(json: JsValue): JsResult[EnqueueId] =
+      json.validate[String].map(EnqueueId.apply).flatMap(_.fold(JsError.apply, 
JsSuccess(_)))
+  }
+  implicit val mailMetadataFormat: Format[MailMetadata] = 
Json.format[MailMetadata]
+}
+
+private[pulsar] object schemas {
+  implicit val schema: Schema[String] = Schema.STRING
+}
+
+/**
+ * In order to implement removal of mails from the queue, `PulsarMailQueue` 
makes use of a topic
+ * in which some filters are pushed. That way, all instances of 
`PulsarMailQueue` in a cluster
+ * eventually start dropping mails matching filters, effectively removing them 
from mail processing.
+ *
+ * The filtering is handled by a `FilterStage` Actor which maintains a set of 
active filters published by
+ * the `remove` method. It is responsible for dropping filters that can no 
longer match any message, and providing
+ * a consistent view of the messages that will be processed to the `browse` 
method.
+ *
+ * A filter cannot remove messages that are enqueued after the call to the 
`remove` method.
+ */
+class PulsarMailQueue(
+  name: MailQueueName,
+  config: PulsarConfiguration,
+  blobIdFactory: BlobId.Factory,
+  mimeMessageStore: Store[MimeMessage, MimeMessagePartsId],
+  mailQueueItemDecoratorFactory: MailQueueItemDecoratorFactory,
+  metricFactory: MetricFactory,
+  gaugeRegistry: GaugeRegistry,
+  system: ActorSystem
+) extends MailQueue with ManageableMailQueue {
+
+  import schemas._
+  import serializers._
+
+  type MessageAsJson = String
+
+  private val enqueueBufferSize = 10
+  private val requeueBufferSize = 10
+  private val awaitTimeout = 10.seconds
+
+  gaugeRegistry.register(QUEUE_SIZE_METRIC_NAME_PREFIX + name, () => getSize)
+  private val dequeueMetrics = 
metricFactory.generate(DEQUEUED_METRIC_NAME_PREFIX + name.asString)
+  private val enqueueMetric = 
metricFactory.generate(ENQUEUED_METRIC_NAME_PREFIX + name.asString)
+
+  private implicit val implicitSystem: ActorSystem = system
+  private implicit val ec: ExecutionContextExecutor = system.dispatcher
+  private implicit val implicitBlobIdFactory: BlobId.Factory = blobIdFactory
+  private implicit val client: PulsarAsyncClient = 
PulsarClient(config.brokerUri)
+  private val admin = {
+    val builder = PulsarAdmin.builder()
+    builder.serviceHttpUrl(config.adminUri).build()
+  }
+
+  private val outTopic = 
Topic(s"persistent://${config.namespace.asString}/James-${name.asString()}")
+  private val scheduledTopic = 
Topic(s"persistent://${config.namespace.asString}/${name.asString()}-scheduled")
+  private val filterTopic = 
Topic(s"persistent://${config.namespace.asString}/pmq-filter-${name.asString()}")
+  private val subscription = Subscription("subscription-" + name.asString())
+  private val scheduledSubscription = Subscription("scheduled-subscription-" + 
name.asString())
+
+  private val outTopicProducer = client.producer(ProducerConfig(outTopic, 
enableBatching = Some(false)))
+  private val scheduledTopicProducer = 
client.producer(ProducerConfig(scheduledTopic, enableBatching = Some(false)))
+
+  private val filterProducer = client.producer(ProducerConfig(filterTopic, 
enableBatching = Some(false)))
+
+  private def completingSinkOf[U](producer: Producer[U]): 
Sink[(ProducerMessage[U], Promise[Done]), NotUsed] =
+    Flow.fromFunction[(ProducerMessage[U], Promise[Done]), Unit] {
+      case (message, promise) => producer
+        .sendAsync(message)
+        .onComplete(enqueued => promise.complete(enqueued.map(_ => Done)))
+    }
+      .to(Sink.ignore)
+
+  private def sinkOf[U](producer: Producer[U]): Sink[ProducerMessage[U], 
NotUsed] =
+    Flow.fromFunction[ProducerMessage[U], 
Unit](producer.sendAsync(_)).to(Sink.ignore)
+
+  def debugLogger[T](loggerName: String): Flow[T, T, NotUsed] =
+    Flow.apply[T]
+      .log(loggerName)
+      .addAttributes(
+        Attributes.logLevels(
+          onElement = Attributes.LogLevels.Debug,
+          onFinish = Attributes.LogLevels.Debug,
+          onFailure = Attributes.LogLevels.Error)
+      )
+
+  private val saveMail = (mail: Mail, duration: Duration, enqueued: 
Promise[Done]) =>
+    Source.fromPublisher(saveMimeMessage(mail.getMessage))
+      .map { partsId =>
+        val mailMetadata = MailMetadata.of(EnqueueId.generate(), mail, partsId)
+        val payload = Json.stringify(Json.toJson(mailMetadata))
+        (payload, duration, enqueued)
+      }
+
+  private val buildProducerMessage =
+    Flow.fromFunction[(MessageAsJson, Duration, Promise[Done]), 
(ProducerMessage[MessageAsJson], Promise[Done])] {
+      case (payload, duration, enqueued) =>
+        duration match {
+          case _: Duration.Infinite =>
+            (ProducerMessage(payload) -> enqueued)
+          case duration: FiniteDuration =>
+            val deliverAt = ZonedDateTime.now().plus(duration.toJava).toInstant
+            (DefaultProducerMessage(key = None, value = payload, deliverAt = 
Some(deliverAt.toEpochMilli), eventTime = 
Some(EventTime(deliverAt.toEpochMilli))) -> enqueued)
+        }
+    }
+
+  private def isScheduled(producerMessage: ProducerMessage[_]) = 
producerMessage.deliverAt.isDefined
+
+  /**
+   * All messages are first enqueued here
+   */
+  private val enqueueFlow: RunnableGraph[SourceQueueWithComplete[(Mail, 
Duration, Promise[Done])]] =
+    Source
+      .queue[(Mail, Duration, Promise[Done])](enqueueBufferSize, 
OverflowStrategy.backpressure)
+      .flatMapConcat(saveMail.tupled)
+      .via(buildProducerMessage)
+      .wireTap(_ => enqueueMetric.increment())
+      .via(debugLogger("enqueue"))
+      .divertTo(completingSinkOf(scheduledTopicProducer), { case (message, _) 
=> isScheduled(message) })
+      .to(completingSinkOf(outTopicProducer))
+
+  /**
+   * Scheduled messages go through this source when delay expires
+   */
+  private val requeueFlow: 
RunnableGraph[SourceQueueWithComplete[ProducerMessage[MessageAsJson]]] = Source
+    .queue[ProducerMessage[MessageAsJson]](requeueBufferSize, 
OverflowStrategy.backpressure)
+    .via(debugLogger("requeue"))
+    .to(sinkOf(outTopicProducer))
+
+  private def buildConsumer(subscription: Subscription, topic: Topic): 
Consumer[String] =
+    client.consumer(
+      ConsumerConfig(
+        subscriptionName = subscription,
+        topics = Seq(topic),
+        subscriptionType = Some(SubscriptionType.Shared),
+        subscriptionInitialPosition = 
Some(SubscriptionInitialPosition.Earliest),
+        negativeAckRedeliveryDelay = Some(1.second)
+      )
+    )
+
+  def consumer(): Consumer[String] = buildConsumer(subscription, outTopic)
+
+  def scheduledConsumer(): Consumer[String] = 
buildConsumer(scheduledSubscription, scheduledTopic)
+
+  private val requeueMessage = Flow.apply[CommittableMessage[String]]
+    .flatMapConcat(message => 
Source.future(requeue.offer(ProducerMessage(message.message.value)).map(_ => 
message)))
+    .flatMapConcat(message => Source.future(message.ack(cumulative = false)))
+    .toMat(Sink.ignore)(Keep.none)
+
+  private val requeueScheduledMessages =
+    streams.committableSource(scheduledConsumer)
+      .toMat(requeueMessage)(Keep.left) //use of toMat to keep reference of 
Control which would be lost by direct usage of flatMapConcat
+
+  private val filterStage: ActorRef = system.actorOf(FilterStage.props)
+
+  private val counter: Sink[Any, Future[Int]] =
+    Sink.fold[Int, Any](0)((acc, _) => acc + 1)
+
+  private val dequeueFlow: RunnableGraph[(Control, Publisher[MailQueueItem])] 
= {
+    implicit val timeout: Timeout = Timeout(1, TimeUnit.SECONDS)
+    streams.committableSource(consumer)
+      .map(message =>
+        (Json.fromJson[MailMetadata](Json.parse(message.message.value)).get,
+          message)
+      ).ask[(Option[MailMetadata], Option[MimeMessagePartsId], 
CommittableMessage[String])](filterStage)
+      .flatMapConcat {
+        case (None, Some(partsId), committableMessage) =>
+          committableMessage.ack()
+          deleteMimeMessage(partsId)
+            .flatMapConcat(_ => Source.empty)
+        case (Some(metadata), _, committableMessage) =>
+          val partsId = metadata.partsId
+          Source
+            .fromPublisher(readMimeMessage(partsId))
+            .map(message => (readMail(metadata, message), partsId, 
committableMessage))
+      }.map { case (mail, partsId, message) => new PulsarMailQueueItem(mail, 
partsId, message) }
+      .map(mailQueueItemDecoratorFactory.decorate(_, name))
+
+      .alsoTo(counter)
+      // akka streams virtual publisher handles a subscription timeout to the
+      // exposed publisher which will terminate the stream if the timeout is 
not
+      // honored. To do so, the akka stream implementation actually subscribes 
to
+      // the publisher.
+      // asPublisher thus requires either :
+      // * fanout=true
+      // * or to disable the subscription timeout mechanism
+      // see akka.stream.impl.VirtualPublisher.onSubscriptionTimeout
+
+      .via(debugLogger("dequeueFlow"))
+      
.toMat(Sink.asPublisher[MailQueue.MailQueueItem](true).withAttributes(Attributes.inputBuffer(initial
 = 1, max = 1)))(Keep.both)
+  }
+
+  class PulsarMailQueueItem(mail: Mail, partsId: MimeMessagePartsId, message: 
CommittableMessage[String]) extends MailQueueItem {
+    override val getMail: Mail = mail
+
+    override def done(success: Boolean): Unit = {
+      if (success) {
+        dequeueMetrics.increment()
+        Await.ready(message.ack(cumulative = false), awaitTimeout)
+        deleteMimeMessage(partsId).run()
+      } else {
+        Await.ready(message.nack(), awaitTimeout)
+      }
+    }
+  }
+
+  /**
+   * For now, filterFlow always rereads the whole topic from the start by 
using a random subscription name.
+   * Filters are never removed from the topic.
+   * This means that the FilterStage will get slower to start as the number of 
filter increases, it will also consume
+   * an increasing amount of RAM until the first mail is processed which will 
invalidate and purge the expired filters.
+   *
+   * @see [[FilterStage]]
+   */
+  private val filterFlow = {
+    val filterSubscription = Subscription("filter-subscription-" + 
name.asString() + "-" + UUID.randomUUID().toString)
+    val logInvalidFilterPayload = Flow.apply[JsResult[Filter]]
+      .collectType[JsError]
+      .map(error => "unable to parse filter" + 
Json.prettyPrint(JsError.toJson(error)))
+      .log("filterFlow")
+      .addAttributes(Attributes.logLevels(onElement = 
Attributes.LogLevels.Error)).to(Sink.ignore)
+
+    streams.source(() =>
+      client.consumer(
+        ConsumerConfig(
+          subscriptionName = filterSubscription,
+          topics = Seq(filterTopic),
+          subscriptionType = Some(SubscriptionType.Shared),
+          subscriptionInitialPosition = 
Some(SubscriptionInitialPosition.Earliest),
+        )
+      )
+    ).map(message => Json.fromJson[Filter](Json.parse(message.value)))
+      .divertTo(logInvalidFilterPayload, when = _.isError)
+      .map(_.get)
+      .via(debugLogger("filterFlow"))
+      .to(Sink.foreach(filter => filterStage ! filter))
+  }
+
+  def registerDequeueSubscription(): Unit = consumer().close()
+
+  def registerScheduledSubscription(): Unit = scheduledConsumer().close()
+
+  // make sure the subscription exists on the server so we can read the size
+  registerDequeueSubscription()
+  registerScheduledSubscription()
+  // the lazy makes the process testable by enforcing some level of 
determinism for tests
+  private lazy val (dequeueControl: Control, dequeuePublisher: 
Publisher[MailQueueItem], scheduledConsumerControl: Control) = startDequeuing()
+  private val enqueue: SourceQueueWithComplete[(Mail, Duration, 
Promise[Done])] = enqueueFlow.run()
+  private val requeue: SourceQueueWithComplete[ProducerMessage[MessageAsJson]] 
= requeueFlow.run()
+  private val filterFlowControl: Control = filterFlow.run()
+
+  private def startDequeuing() = {
+    val (dequeueControl: Control, dequeuePublisher: Publisher[MailQueueItem]) 
= dequeueFlow.run()
+    val scheduledConsumerControl: Control = requeueScheduledMessages.run()
+    (dequeueControl, dequeuePublisher, scheduledConsumerControl)
+  }
+
+  /**
+   * @inheritdoc
+   */
+  override val getName: MailQueueName = name
+
+  /**
+   * @inheritdoc
+   */
+  override def enQueue(mail: Mail, delay: JavaDuration): Unit = 
syncEnqueue(mail, delay.toScala)
+
+  /**
+   * @inheritdoc
+   */
+  override def enQueue(mail: Mail): Unit = syncEnqueue(mail, 
Duration.Undefined)
+
+  private def syncEnqueue(mail: Mail, delay: Duration): Unit = {
+    metricFactory.decorateSupplierWithTimerMetric(
+      ENQUEUED_TIMER_METRIC_NAME_PREFIX + name.asString,
+      () => Await.result(internalEnqueue(mail, delay), awaitTimeout)
+    )
+  }
+
+  /**
+   * @inheritdoc
+   */
+  override def enqueueReactive(mail: Mail): Publisher[Void] = {
+    metricFactory.decoratePublisherWithTimerMetric(
+      ENQUEUED_TIMER_METRIC_NAME_PREFIX + name.asString,
+      Source.lazyFuture(() => internalEnqueue(mail, 
Duration.Undefined)).runWith(Sink.asPublisher[Void](fanout = true))
+    )
+  }
+
+  private def internalEnqueue(mail: Mail, delay: Duration) = {
+    val enqueueCompletion = Promise[Done]()
+    for {
+      offer <- enqueue.offer((mail, delay, enqueueCompletion))
+      enqueueCompleted <- enqueueCompletion.future
+    } yield null
+  }
+
+  /**
+   * @inheritdoc
+   */
+  override def deQueue(): Publisher[MailQueue.MailQueueItem] = dequeuePublisher
+
+  /**
+   * @inheritdoc
+   */
+  override def close(): Unit = {
+    enqueue.complete()
+    requeue.complete()
+    dequeueControl.stop()
+    scheduledConsumerControl.stop()
+    filterFlowControl.stop()
+    client.close()
+    system.terminate() //FIXME: to remove when injected
+  }
+
+  /**
+   * @inheritdoc
+   */
+  override def getSize: Long = getSize(outTopic, subscription) + 
getSize(scheduledTopic, scheduledSubscription)
+
+
+  private def getSize(topic: Topic, subscription: Subscription): Long = {
+    try {
+      val subscriptions = admin.topics().getStats(topic.name).getSubscriptions
+      val maybeStats = Option(subscriptions.get(subscription.name))
+      maybeStats.map(_.getMsgBacklog).getOrElse(0)
+    } catch {
+      case _: NotFoundException => 0L
+    }
+  }
+
+  /**
+   * @inheritdoc
+   */
+  override def flush(): Long = {
+    def lastScheduledMessageId(f: MessageId => Long): Long = 
lastMessage(scheduledTopic)
+      .map(_.messageId)
+      .fold(0L)(f)
+
+    lastScheduledMessageId { lastMessageId =>
+      val flushStart = Instant.now()
+
+      def isScheduledAfterFlush(message: ConsumerMessage[String]) = 
Instant.ofEpochMilli(message.eventTime.value).isAfter(flushStart)
+
+      def putMessageInOutTopic(message: ConsumerMessage[String]) = 
requeue.offer(ProducerMessage(message.value))
+
+      //prevents normal deque flow of scheduled messages by moving the 
scheduled subscription to last know message
+      admin.topics().resetCursor(scheduledTopic.name, 
scheduledSubscription.name, lastMessageId)
+
+      val copy = read(
+        ReaderConfig(scheduledTopic, startMessage = 
Message(MessageId.earliest), startMessageIdInclusive = true)
+      )
+        .filter(isScheduledAfterFlush) //avoid duplicate delivery of messages 
which are already handled by scheduledSubscription
+        .filter(_.messageId.underlying <= lastMessageId.underlying) //stop 
copying message at lastMessageId
+        .runFold(0L) { (counter, message) =>
+          putMessageInOutTopic(message)
+          counter + 1
+        }
+      Await.result(copy, Duration.Inf)
+    }
+  }
+
+  private def read(config: ReaderConfig)(implicit executionContext: 
ExecutionContext): Source[ConsumerMessage[String], NotUsed] =
+    Source.unfoldResourceAsync[ConsumerMessage[String], Reader[String]](
+      create = () => {
+        Future.successful(
+          client.reader(
+            config = config
+          )
+        )
+      },
+      read = reader => {
+        if (reader.hasMessageAvailable) reader.nextAsync.map(Some(_))
+        else Future.successful(None)
+      },
+      close = reader => reader.closeAsync.map(_ => Done))
+
+
+  /**
+   * @inheritdoc
+   */
+  override def clear(): Long = {
+    val count = getSize()
+    admin.topics().delete(outTopic.name, true)
+    count
+  }

Review comment:
       Shouldn't clear also get rid of the scheduled topic?
   
   After clearing, can messages still transit through the mailqueue? (I expect 
it should, but I do not see how this can work if we nuke the out topic... New 
producers would re-create automatically a missing topic?)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to