0

I've checked a bunch of other forums and posts, but I can't seem to narrow down the issue. All I keep seeing is people saying not to use logging and how it's deprecated, but I don't even know where I'm using it in my code.

When I run the following code:

import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.twitter._
import twitter4j.Status

object TrendingHashTags {
  def main(args: Array[String]): Unit = {
    if (args.length < 8) {
      System.err.println("Usage: TrendingHashTags <consumer key> <consumer secret> " +
                          "<access token> <access token secret> " +
                          "<language> <batch interval> <min-threshold> <show-count> " +
                          "[<filters>]")
      System.exit(1)
    }

    val Array(consumerKey, consumerSecret, accessToken, accessTokenSecret,
                              lang, batchInterval, minThreshold, showCount ) = args.take(8)
    val filters = args.takeRight(args.length - 8)

    System.setProperty("twitter4j.oauth.consumerKey", consumerKey)
    System.setProperty("twitter4j.oauth.consumerSecret", consumerSecret)
    System.setProperty("twitter4j.oauth.accessToken", accessToken)
    System.setProperty("twitter4j.oauth.accessTokenSecret", accessTokenSecret)

    val conf = new SparkConf().setMaster(("local[4]")).setAppName("TrendingHashTags")
    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))
    ssc.checkpoint("checkpoint")
    val tweets = TwitterUtils.createStream(ssc, None, filters)
    val tweetsFilteredByLang = tweets.filter{tweet => tweet.getLang() == lang}
    val statuses = tweetsFilteredByLang.map{tweet => tweet.getText()}
    val words = statuses.flatMap{status => status.split("""\s+""")}
    val hashTags = words.filter{word => word.startsWith("#")}
    val hashTagPairs = hashTags.map{hashtag => (hashtag, 1)}
    val tagsWithCounts = hashTagPairs.updateStateByKey(
      (counts: Seq[Int], prevCount: Option[Int]) =>
        prevCount.map{c => c + counts.sum}.orElse{Some(counts.sum)}
    )
    val topHashTags = tagsWithCounts.filter { case (t, c) =>
      c > minThreshold.toInt
    }
    val sortedTopHashTags = topHashTags.transform{rdd =>
      rdd.sortBy({case(w, c) => c}, false)
    }
    sortedTopHashTags.print(showCount.toInt)
    ssc.start()
    ssc.awaitTermination()
  }
}

I get the following error stack trace:

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/Logging
    at java.lang.ClassLoader.defineClass1(Native Method)
    at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
    at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
    at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
    at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    at org.apache.spark.streaming.twitter.TwitterUtils$.createStream(TwitterUtils.scala:44)
    at TrendingHashTags$.main(TrendingHashTags.scala:28)
    at TrendingHashTags.main(TrendingHashTags.scala)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.Logging
    at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)

Here are my build.sbt contents:

name := "sparkStreaming"

version := "0.1"

scalaVersion := "2.11.12"

libraryDependencies ++= Seq("org.apache.spark" %% "spark-core" % "2.4.5",
  "org.apache.spark" %% "spark-sql" % "2.4.5",
  "org.apache.spark" %% "spark-streaming" % "2.4.5" % "provided",
  "org.apache.spark" %% "spark-streaming-twitter" % "1.6.3")
1

1 Answer 1

0

Clear indication is, some where internally you are using lower version of spark... (spark 1.5 may be)

sbt inspect tree clean

you can check with this. for maven users mvn depdency:tree will give all the dependencies used list

One more thing is you are using

  "org.apache.spark" %% "spark-streaming" % "2.4.5" % "provided",

change it to default maven scope compile and see.

similar quetion and answers here

3
  • What should I change the dependency to? I'm not sure what you mean by default. Forgive me for my lack of knowledge as I'm new to Scala and Spark. I copied the SBT dependency straight from mvnrepository.com/artifact/org.apache.spark/….
    – Kane
    Commented May 7, 2020 at 23:00
  • default scope compile for maven i mean also checkl sbt inspect tree clean to find out old jar which is reffering Commented May 7, 2020 at 23:02
  • dependency change was useful ? Commented May 11, 2020 at 22:13

Not the answer you're looking for? Browse other questions tagged or ask your own question.