CDH6.3.2 升级spark参考
wget http://distfiles.macports.org/scala2.12/scala-2.12.15.tgz
wget https://archive.apache.org/dist/maven/maven-3/3.8.6/binaries/apache-maven-3.8.6-bin.tar.gz
wget https://archive.apache.org/dist/spark/spark-3.3.4/spark-3.3.4.tgz
tar -zxvf scala-2.12.15.tgz -C /opt/
tar -zxvf apache-maven-3.8.6-bin.tar.gz -C /opt/
tar -zxvf spark-3.3.4.tgz -C /opt/
tar -zxvf scala-2.12.15.tgz -C /opt/spark-3.3.4/build/
tar -zxvf apache-maven-3.8.6-bin.tar.gz -C /opt/spark-3.3.4/build/
export JAVA_HOME=/usr/java/jdk1.8.0_361-amd64
export HADOOP_CONF_DIR=/etc/hadoop/conf
export HADOOP_HOME=/opt/cloudera/parcels/CDH/lib/hadoop
export HADOOP_CLASSPATH=`hadoop classpath`
export MAVEN_HOME=/opt/apache-maven-3.8.6
export SCALA_HOME=/opt/scala-2.12.15
export PATH=$JAVA_HOME/bin:$PATH:$SCALA_HOME/bin:$HADOOP_CONF_DIR:$HADOOP_HOME:$MAVEN_HOME/bin
<repository>
<id>aliyun</id>
<url>https://maven.aliyun.com/nexus/content/groups/public</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<!-- <hadoop.version>3.3.2</hadoop.version> -->
<hadoop.version>3.0.0-cdh6.3.2</hadoop.version>
vim /opt/spark-3.3.4/dev/make-distribution.sh
export MAVEN_OPTS="-Xmx8g -XX:ReservedCodeCacheSize=2g"
MVN="/opt/apache-maven-3.8.6/bin/mvn"
cd /opt/spark-3.3.4
ls -l ./dev/change-scala-version.sh
./dev/change-scala-version.sh 2.12
./dev/make-distribution.sh --name 3.0.0-cdh6.3.2 --tgz -Pyarn -Phadoop-3.0 -Phive -Phive-thriftserver -Dhadoop.version=3.0.0-cdh6.3.2 -X
–tgz 指定以 tgz 结尾
–name 后面跟的是 Hadoop 的版本,在后面生成的 tar 包带的版本号
-Pyarn 是基于 yarn
-Dhadoop.version=3.0.0-cdh6.3.2 指定 Hadoop 的版本。
ll /opt/spark-3.3.4/spark-3.3.4-bin-3.0.0-cdh6.3.2.tgz
-rw-r--r-- 1 root root 266773599 11月 20 18:08 /opt/spark-3.3.4/spark-3.3.4-bin-3.0.0-cdh6.3.2.tgz
tar -zxvf spark-3.3.4-bin-3.0.0-cdh6.3.2.tgz -C /opt/cloudera/parcels/CDH/lib
cd /opt/cloudera/parcels/CDH/lib
mv spark-3.3.4-bin-3.0.0-cdh6.3.2 spark3
cp /etc/spark/conf/spark-env.sh /opt/cloudera/parcels/CDH/lib/spark3/conf
chmod +x /opt/cloudera/parcels/CDH/lib/spark3/conf/spark-env.sh
vim /opt/cloudera/parcels/CDH/lib/spark3/conf/spark-env.sh
export SPARK_HOME=/opt/cloudera/parcels/CDH/lib/spark3
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf}
cp /etc/hive/conf/hive-site.xml /opt/cloudera/parcels/CDH/lib/spark3/conf/
vim /opt/cloudera/parcels/CDH/bin/spark-sql
export HADOOP_CONF_DIR=/etc/hadoop/conf
export YARN_CONF_DIR=/etc/hadoop/conf
SOURCE="${BASH_SOURCE[0]}"
BIN_DIR="$( dirname "$SOURCE" )"
while [ -h "$SOURCE" ]
do
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$BIN_DIR/$SOURCE"
BIN_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
done
BIN_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
LIB_DIR=$BIN_DIR/../lib
export HADOOP_HOME=$LIB_DIR/hadoop
. $LIB_DIR/bigtop-utils/bigtop-detect-javahome
exec $LIB_DIR/spark3/bin/spark-submit --class org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver "$@"
chmod +x /opt/cloudera/parcels/CDH/bin/spark-sql
alternatives --install /usr/bin/spark-sql spark-sql /opt/cloudera/parcels/CDH/bin/spark-sql 1
cd /opt/cloudera/parcels/CDH/lib/spark3/conf
mv log4j2.properties.template log4j2.properties
cp /opt/cloudera/parcels/CDH/lib/spark/conf/spark-defaults.conf ./
vim /opt/cloudera/parcels/CDH/lib/spark3/conf/spark-defaults.conf
spark.extraListeners、spark.sql.queryExecutionListeners、spark.yarn.jars
spark.yarn.jars=hdfs://ns1/user/spark/spark3/jars/*
hadoop fs -mkdir -p /user/spark/spark3/jars
cd /opt/cloudera/parcels/CDH/lib/spark3/jars
hadoop fs -put *.jar /user/spark/spark3/jars
vim /opt/cloudera/parcels/CDH/bin/spark3-submit
export HADOOP_CONF_DIR=/etc/hadoop/conf
export YARN_CONF_DIR=/etc/hadoop/conf
SOURCE="${BASH_SOURCE[0]}"
BIN_DIR="$( dirname "$SOURCE" )"
while [ -h "$SOURCE" ]
do
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$BIN_DIR/$SOURCE"
BIN_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
done
BIN_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
LIB_DIR=/opt/cloudera/parcels/CDH/lib
export HADOOP_HOME=$LIB_DIR/hadoop
. $LIB_DIR/bigtop-utils/bigtop-detect-javahome
export PYTHONHASHSEED=0
exec $LIB_DIR/spark3/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"
chmod +x /opt/cloudera/parcels/CDH/bin/spark3-submit
alternatives --install /usr/bin/spark3-submit spark3-submit /opt/cloudera/parcels/CDH/bin/spark3-submit 1
cd /opt/cloudera/parcels/CDH/lib/spark3/
scp -r spark3 kube-39:/opt/cloudera/parcels/CDH/lib/
scp -r spark3 kube-40:/opt/cloudera/parcels/CDH/lib/
scp -r spark3 kube-41:/opt/cloudera/parcels/CDH/lib/
spark3-submit --conf "spark.default.parallelism=100" --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode cluster --driver-memory 8g --executor-memory 4g --executor-cores 4 --num-executors 3 --queue root.default /opt/cloudera/parcels/CDH/lib/spark3/examples/jars/spark-examples*.jar 1000
java FetchFailed(BlockManagerId(2, n6, 7337, None), shuffleId=57, mapId=136, reduceId=0, message=
org.apache.spark.shuffle.FetchFailedException
at org.apache.spark.errors.SparkCoreErrors$.fetchFailedError(SparkCoreErrors.scala:312)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:1169)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:904)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:85)
at org.apache.spark.util.CompletionIterator.next(CompletionIterator.scala:29)
.......
Caused by: java.lang.RuntimeException: java.lang.IllegalArgumentException: Unknown message type: 9
at org.apache.spark.network.shuffle.protocol.BlockTransferMessage$Decoder.fromByteBuffer(BlockTransferMessage.java:71)
at org.apache.spark.network.shuffle.ExternalShuffleBlockHandler.receive(ExternalShuffleBlockHandler.java:81)
at org.apache.spark.network.server.TransportRequestHandler.processRpcRequest(TransportRequestHandler.java:150)
......
spark.shuffle.useOldFetchProtocol=true