Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Long meta string than 32767 is not allowed #1829

Open
1 of 2 tasks
davidnadeau opened this issue Sep 6, 2024 · 1 comment
Open
1 of 2 tasks

Long meta string than 32767 is not allowed #1829

davidnadeau opened this issue Sep 6, 2024 · 1 comment
Labels
bug Something isn't working

Comments

@davidnadeau
Copy link

davidnadeau commented Sep 6, 2024

Search before asking

  • I had searched in the issues and found no similar issues.

Version

0.7.0

Component(s)

Java, Other

Minimal reproduce step

FuryUtils.scala

package testing

import testing.ListingFeatures
import org.apache.fury._
import org.apache.fury.config._
import org.apache.fury.resolver.MetaContext

object FuryUtils {
  val cores = Runtime.getRuntime().availableProcessors()
  lazy val fury = Fury
    .builder()
    .withLanguage(Language.JAVA)
    .withScalaOptimizationEnabled(true)
    .requireClassRegistration(true)
    .withMetaShare(true)
    .buildThreadSafeFuryPool(cores, cores)

  val context: MetaContext = new MetaContext()

  def toFury(value: ListingFeatures): Array[Byte] = {
    fury.execute((f) => {
      f.getSerializationContext.setMetaContext(context)
      f.serialize(value)
    })
  }

  def fromFury(bytes: Array[Byte]): ListingFeatures = {
    fury.execute((f) => {
      f.getSerializationContext.setMetaContext(context)
      f.deserialize(bytes).asInstanceOf[ListingFeatures]
    })
  }

  fury.register(
    Class.forName("scala.collection.generic.DefaultSerializationProxy")
  )
  fury.register(Class.forName("scala.collection.immutable.$colon$colon"))
  fury.register(Class.forName("scala.collection.immutable.Nil$"))
  fury.register(Class.forName("scala.collection.IterableFactory$ToFactory"))
  fury.register(Class.forName("scala.collection.immutable.List$"))
  fury.register(Class.forName("scala.collection.generic.SerializeEnd$"))
  fury.register(Class.forName("scala.Some"))
  fury.register(Class.forName("testing.TimestampedUserId"))
  fury.register(Class.forName("testing.ListingCountFeatures"))
  fury.register(
    Class.forName("testing.ListingTimeseriesFeatures")
  )
  fury.register(Class.forName("testing.ListingFeatures"))
}

ListingFeatures.scala

package testing

import java.time.Instant

case class TimestampedUserId(
    timestamp: Instant,
    userId: Long
)

trait FamilyType {
  def isEmpty: Boolean
}

case class ListingCountFeatures(
    feature1: Option[Long] = None,
    feature2: Option[Long] = None,
    feature3: Option[Long] = None,
    feature4: Option[Long] = None
) extends FamilyType {
  def isEmpty: Boolean = this == ListingCountFeatures()
}

case class ListingTimeseriesFeatures(
    feature5: Option[List[TimestampedUserId]] = None,
    feature6: Option[List[TimestampedUserId]] = None,
    feature7: Option[List[TimestampedUserId]] = None,
    feature8: Option[List[TimestampedUserId]] = None
) extends FamilyType {
  def isEmpty: Boolean = this == ListingTimeseriesFeatures()
}

case class ListingFeatures(
    id: Long,
    counts: Option[ListingCountFeatures] = None,
    timeseries: Option[ListingTimeseriesFeatures] = None
)

Generators.scala (generates data for the test)

package testing

package testing

import java.time.Instant

object Generators {
  val random = new scala.util.Random(0)

  val listingFeatures = ListingFeatures(
    id = random.nextLong(),
    counts = Some(
      ListingCountFeatures(
        feature1 = Some(random.nextLong()),
        feature2 = Some(random.nextLong()),
        feature3 = Some(random.nextLong()),
        feature4 = Some(random.nextLong())
      )
    ),
    timeseries = Some(
      ListingTimeseriesFeatures(
        feature5 = Some(
          List.fill(random.between(0, 51))(
            TimestampedUserId(Instant.now(), random.nextLong())
          )
        ),
        feature6 = Some(
          List.fill(random.between(0, 51))(
            TimestampedUserId(Instant.now(), random.nextLong())
          )
        ),
        feature7 = Some(
          List.fill(random.between(0, 51))(
            TimestampedUserId(Instant.now(), random.nextLong())
          )
        ),
        feature8 = Some(
          List.fill(random.between(0, 51))(
            TimestampedUserId(Instant.now(), random.nextLong())
          )
        )
      )
    )
  )
}

SerializationBenchmarks.scala

package testing

import testing.serialization._
import org.openjdk.jmh.annotations._

import java.util.concurrent.TimeUnit

@State(Scope.Benchmark)
class SerializationBenchmarks {
  var features: ListingFeatures = _

  @Setup(Level.Trial)
  def setup(): Unit = {
    features = Generators.listingFeatures
  }

  @Benchmark
  @BenchmarkMode(Array(Mode.AverageTime))
  @OutputTimeUnit(TimeUnit.MICROSECONDS)
  @Warmup(iterations = 100)
  @Measurement(iterations = 10000)
  def furySerializationRoundTrip(): ListingFeatures = {
    FuryUtils.fromFury(FuryUtils.toFury(features))
  }
}

build.sbt

ThisBuild / version := "0.0.0"

ThisBuild / scalaVersion := "2.13.14"

lazy val root = (project in file("."))
  .settings(
    name := "testing",
    libraryDependencies ++= Seq(
      "org.apache.fury" % "fury-core" % "0.7.0",
       "org.openjdk.jmh" % "jmh-core" % "1.37",
      "org.openjdk.jmh" % "jmh-generator-annprocess" % "1.37",
    )
  )

What did you expect to see?

The benchmark to run.

What did you see instead?

[info] java.lang.RuntimeException: java.lang.IllegalArgumentException: Long meta string than 32767 is not allowed
[info]  at org.apache.fury.pool.ThreadPoolFury.execute(ThreadPoolFury.java:82)
[info]  at testing.serialization.FuryUtils$.fromFury(FuryUtils.scala:29)
[info]  at testing.SerializationBenchmarks.furySerializationRoundTrip(SerializationBenchmarks.scala:23)
[info]  at com.etsy.cachetesting.jmh_generated.SerializationBenchmarks_furySerializationRoundTrip_jmhTest.furySerializationRoundTrip_avgt_jmhStub(SerializationBenchmarks_furySerializationRoundTrip_jmhTest.java:236)
[info]  at com.etsy.cachetesting.jmh_generated.SerializationBenchmarks_furySerializationRoundTrip_jmhTest.furySerializationRoundTrip_AverageTime(SerializationBenchmarks_furySerializationRoundTrip_jmhTest.java:176)
[info]  at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
[info]  at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
[info]  at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
[info]  at java.base/java.lang.reflect.Method.invoke(Method.java:568)
[info]  at org.openjdk.jmh.runner.BenchmarkHandler$BenchmarkTask.call(BenchmarkHandler.java:527)
[info]  at org.openjdk.jmh.runner.BenchmarkHandler$BenchmarkTask.call(BenchmarkHandler.java:504)
[info]  at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
[info]  at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:539)
[info]  at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
[info]  at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
[info]  at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
[info]  at java.base/java.lang.Thread.run(Thread.java:833)
[info] Caused by: java.lang.IllegalArgumentException: Long meta string than 32767 is not allowed
[info]  at org.apache.fury.util.Preconditions.checkArgument(Preconditions.java:58)
[info]  at org.apache.fury.meta.MetaStringEncoder.encode(MetaStringEncoder.java:80)
[info]  at org.apache.fury.resolver.ClassResolver.populateBytesToClassInfo(ClassResolver.java:1713)
[info]  at org.apache.fury.resolver.ClassResolver.loadBytesToClassInfo(ClassResolver.java:1699)
[info]  at org.apache.fury.resolver.ClassResolver.readClassInternal(ClassResolver.java:1588)
[info]  at org.apache.fury.serializer.ReplaceResolveSerializer.readObject(ReplaceResolveSerializer.java:310)
[info]  at org.apache.fury.serializer.ReplaceResolveSerializer.read(ReplaceResolveSerializer.java:305)
[info]  at org.apache.fury.Fury.readData(Fury.java:923)
[info]  at org.apache.fury.serializer.ReplaceResolveSerializer.read(ReplaceResolveSerializer.java:284)
[info]  at org.apache.fury.serializer.collection.CollectionSerializers$JDKCompatibleCollectionSerializer.read(CollectionSerializers.java:743)
[info]  at scala.SomeFuryCodec_1_1108411398_52533857.read(SomeFuryCodec_1_1108411398_52533857.java:70)
[info]  at testing.ListingTimeseriesFeaturesFuryCodec_1_1108411398_981481512.readFields$(ListingTimeseriesFeaturesFuryCodec_1_1108411398_981481512.java:137)
[info]  at testing.ListingTimeseriesFeaturesFuryCodec_1_1108411398_981481512.read(ListingTimeseriesFeaturesFuryCodec_1_1108411398_981481512.java:171)
[info]  at scala.SomeFuryCodec_1_1108411398_52533857.read(SomeFuryCodec_1_1108411398_52533857.java:70)
[info]  at com.etsy.cachetesting.ListingFeaturesFuryCodec_1_1108411398_857240609.readFields$(ListingFeaturesFuryCodec_1_1108411398_857240609.java:97)
[info]  at com.etsy.cachetesting.ListingFeaturesFuryCodec_1_1108411398_857240609.read(ListingFeaturesFuryCodec_1_1108411398_857240609.java:118)
[info]  at org.apache.fury.Fury.readDataInternal(Fury.java:955)
[info]  at org.apache.fury.Fury.readRef(Fury.java:857)
[info]  at org.apache.fury.Fury.deserialize(Fury.java:789)
[info]  at org.apache.fury.Fury.deserialize(Fury.java:711)
[info]  at testing.serialization.FuryUtils$.$anonfun$fromFury$1(FuryUtils.scala:31)
[info]  at org.apache.fury.pool.ThreadPoolFury.execute(ThreadPoolFury.java:79)
[info]  ... 16 more

Anything Else?

This error occurs when forcing class registration. When I turn class registration off, I instead get:

[info] Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 447 out of bounds for length 14
[info]  at org.apache.fury.collection.ObjectArray.get(ObjectArray.java:62)
[info]  at org.apache.fury.resolver.ClassResolver.readClassInfoWithMetaShare(ClassResolver.java:1348)
[info]  at org.apache.fury.resolver.ClassResolver.readClassInfo(ClassResolver.java:1638)
[info]  at scala.collection.immutable._colon_colonFuryMetaShared8782623466212560208Codec_1_1108411398_975194116.readFields$(_colon_colonFuryMetaShared8782623466212560208Codec_1_1108411398_975194116.java:73)
[info]  at scala.collection.immutable._colon_colonFuryMetaShared8782623466212560208Codec_1_1108411398_975194116.read(_colon_colonFuryMetaShared8782623466212560208Codec_1_1108411398_975194116.java:122)
[info]  at scala.SomeFuryMetaShared2848185738884915280Codec_1_1108411398_52533857.read(SomeFuryMetaShared2848185738884915280Codec_1_1108411398_52533857.java:47)
[info]  at com.etsy.cachetesting.ListingTimeseriesFeaturesFuryMetaShared7510468037580857040Codec_1_1108411398_981481512.readFields$(ListingTimeseriesFeaturesFuryMetaShared7510468037580857040Codec_1_1108411398_981481512.java:51)
[info]  at com.etsy.cachetesting.ListingTimeseriesFeaturesFuryMetaShared7510468037580857040Codec_1_1108411398_981481512.read(ListingTimeseriesFeaturesFuryMetaShared7510468037580857040Codec_1_1108411398_981481512.java:80)
[info]  at scala.SomeFuryMetaShared2848185738884915280Codec_1_1108411398_52533857.read(SomeFuryMetaShared2848185738884915280Codec_1_1108411398_52533857.java:47)
[info]  at com.etsy.cachetesting.ListingFeaturesFuryMetaShared8713304253468170576Codec_1_1108411398_857240609.readFields$(ListingFeaturesFuryMetaShared8713304253468170576Codec_1_1108411398_857240609.java:53)
[info]  at com.etsy.cachetesting.ListingFeaturesFuryMetaShared8713304253468170576Codec_1_1108411398_857240609.read(ListingFeaturesFuryMetaShared8713304253468170576Codec_1_1108411398_857240609.java:66)
[info]  at org.apache.fury.Fury.readDataInternal(Fury.java:955)
[info]  at org.apache.fury.Fury.readRef(Fury.java:857)
[info]  at org.apache.fury.Fury.deserialize(Fury.java:789)
[info]  at org.apache.fury.Fury.deserialize(Fury.java:711)
[info]  at com.etsy.cachetesting.serialization.FuryUtils$.$anonfun$fromFury$1(FuryUtils.scala:31)
[info]  at org.apache.fury.pool.ThreadPoolFury.execute(ThreadPoolFury.java:79)
[info]  ... 16 more

Are you willing to submit a PR?

  • I'm willing to submit a PR!
@davidnadeau davidnadeau added the bug Something isn't working label Sep 6, 2024
@davidnadeau davidnadeau changed the title Java heap space OOM using shared meta Sep 6, 2024
@chaokunyang
Copy link
Collaborator

chaokunyang commented Sep 10, 2024

Hi @davidnadeau, could you use fury snapshot jar instead? I believe this issue has been addressed in main branch in #1812

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
2 participants