For those interested, here is the final, fixed and working code:

import akka.NotUsed
import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.{HttpEntity, _}
import akka.http.scaladsl.server.Directives.{complete, _}
import akka.stream._
import akka.stream.scaladsl.{Sink, Source}
import akka.util.ByteString
import org.apache.tika.Tika

import scala.concurrent.Future

object Service extends App {

  implicit val system = ActorSystem()
  implicit val materializer = ActorMaterializer()
  implicit val executionContext = system.dispatcher

  val tika = new Tika()

  val minimumDetectionSize = 64 * 1024

  def detect(source: Source[ByteString, Any], previousPrefix: ByteString = 
ByteString.empty): Future[(MediaType.Binary, Source[ByteString, NotUsed])] =
    source.prefixAndTail(1).runWith(Sink.head).flatMap { case (prefix, tail) =>
      prefix.headOption.fold {
        
Future.successful((MediaType.custom(tika.detect(previousPrefix.toArray), binary 
= true).asInstanceOf[MediaType.Binary], 
tail.prepend(Source.single(previousPrefix))))
      }{ prefix =>
        val completePrefix = previousPrefix ++ prefix
        if (completePrefix.size < minimumDetectionSize)
          detect(tail, completePrefix)
        else
          
Future.successful((MediaType.custom(tika.detect(completePrefix.toArray), binary 
= true).asInstanceOf[MediaType.Binary], 
tail.prepend(Source.single(completePrefix))))
      }
    }

  val route =
    (path("media") & parameter("url")) { url =>
      get {
        complete(Http().singleRequest(HttpRequest(uri = url)).flatMap(response 
=> detect(response.entity.dataBytes)).map { case (mediaType, source) =>
          HttpEntity(ContentType(mediaType), source)
        })
      }
    }

  Http().bindAndHandle(route, "127.0.0.1", 8080)

}


Have a good day,
Victor

Le jeudi 6 avril 2017 11:40:57 UTC+2, Victor a écrit :
>
> Ahh I didn't know this operator ! Thank you very much :)
>
> I did a test and it works (it's much less code):
>
> def detect(data: ByteString) = {
>   // ...Apache Tika stuff...
>   Some(MediaType.custom("text/plain", binary = 
> true).asInstanceOf[MediaType.Binary])
> }
>
>
> def detect(source: Source[ByteString, _], previousPrefix: ByteString = 
> ByteString.empty): Future[(MediaType.Binary, Source[ByteString, _])] =
>   source.prefixAndTail(1).runWith(Sink.head).flatMap { case (prefix, tail) =>
>     detect(prefix.head).fold {
>       detect(tail, prefix.head)
>     }{ mediaType =>
>       Future.successful((mediaType, tail.prepend(Source.single(previousPrefix 
> ++ prefix.head))))
>     }
>   }
>
>
> [...]
>
>
> val route =
>   path("test") {
>     get {
>       complete("123456789\n")
>     }
>   } ~
>   (path("media") & parameter("url" ? "http://127.0.0.1:8080/test";)) { url =>
>     get {
>       complete(Http().singleRequest(HttpRequest(uri = url)).flatMap(response 
> => detect(response.entity.dataBytes)).map { case (mediaType, source) =>
>         HttpEntity(ContentType(mediaType), source)
>       })
>     }
>   }
>
>
> With `prefixAndTail` I ask for only 1 `ByteString` and try to detect the 
> mime-type with it, but if it's not enough, the `detect` method is executed 
> until I can figure out the mime-type.
>
> I did some benchmarks with `wrk`, here is the results:
>
> $ wrk -t8 -c32 -d10s "http://127.0.0.1:8080/test";
>
> Running 10s test @ http://127.0.0.1:8080/test
>
>  8 threads and 32 connections
>
>  Thread Stats   Avg      Stdev     Max   +/- Stdev
>
>    Latency     4.58ms   23.80ms 437.09ms   97.11%
>
>    Req/Sec     4.89k     1.21k   11.45k    75.73%
>
>  386406 requests in 10.04s, 56.38MB read
>
> Requests/sec:  38499.53
>
> Transfer/sec:  5.62MB
>
> $ wrk -t8 -c32 -d10s "http://127.0.0.1:8080/media";
>
> Running 10s test @ http://127.0.0.1:8080/media
>
>  8 threads and 32 connections
>
>  Thread Stats   Avg      Stdev     Max   +/- Stdev
>
>    Latency    15.24ms   26.91ms 338.60ms   92.50%
>
>    Req/Sec   449.52     86.50   820.00     71.46%
>
>  35837 requests in 10.04s, 5.33MB read
>
> Requests/sec:   3571.02
>
> Transfer/sec:   544.06KB
>
> It's a naïve benchmark but it's just to give me an idea about the 
> implementation.
>
> Thank you again :)
> Victor
>
> Le mercredi 5 avril 2017 23:59:03 UTC+2, Kyrylo Stokoz a écrit :
>>
>> I'm not sure but maybe Source.prefixAndTail can help you to archive same 
>> thing?
>>
>> You can run detection on prefix and later combine it with tail again via 
>> source.concat?
>>
>

-- 
>>>>>>>>>>      Read the docs: http://akka.io/docs/
>>>>>>>>>>      Check the FAQ: 
>>>>>>>>>> http://doc.akka.io/docs/akka/current/additional/faq.html
>>>>>>>>>>      Search the archives: https://groups.google.com/group/akka-user
--- 
You received this message because you are subscribed to the Google Groups "Akka 
User List" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/akka-user.
For more options, visit https://groups.google.com/d/optout.

Reply via email to