hadoop生态搭建(3节点)-10.spark配置
# https://www.scala-lang.org/download/2.12.4.html
# ==================================================================安装 scala
tar -zxvf ~/scala-2.12.4.tgz -C /usr/local rm –r ~/scala-2.12.4.tgz
# http://archive.apache.org/dist/spark/spark-2.3.0/
# ==================================================================安装 spark
tar -zxf ~/spark-2.3.0-bin-hadoop2.7.tgz -C /usr/local mv /usr/local/spark-2.3.0-bin-hadoop2.7 /usr/local/spark-2.3.0 rm –r ~/spark-2.3.0-bin-hadoop2.7.tgz
# 环境变量
# ==================================================================node1 node2 node3
vi /etc/profile# 在export PATH USER LOGNAME MAIL HOSTNAME HISTSIZE HISTCONTROL下添加export JAVA_HOME=/usr/java/jdk1.8.0_111 export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.12 export HADOOP_HOME=/usr/local/hadoop/hadoop-2.7.6 export MYSQL_HOME=/usr/local/mysql export HBASE_HOME=/usr/local/hbase-1.2.4 export HIVE_HOME=/usr/local/hive-2.1.1 export SCALA_HOME=/usr/local/scala-2.12.4 export KAFKA_HOME=/usr/local/kafka_2.12-0.10.2.1 export FLUME_HOME=/usr/local/flume-1.8.0 export SPARK_HOME=/usr/local/spark-2.3.0export PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$ZOOKEEPER_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$MYSQL_HOME/bin:$HBASE_HOME/bin:$HIVE_HOME/bin:$SCALA_HOME/bin:$KAFKA_HOME/bin:$FLUME_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/sbin export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jarexport HADOOP_INSTALL=$HADOOP_HOME export HADOOP_MAPRED_HOME=$HADOOP_HOME export HADOOP_COMMON_HOME=$HADOOP_HOME export HADOOP_HDFS_HOME=$HADOOP_HOME export YARN_HOME=$HADOOP_HOME export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
# ==================================================================node1
# 使环境变量生效 source /etc/profile# 查看配置结果 echo $SPARK_HOME
# ==================================================================node1
cp $SPARK_HOME/conf/docker.properties.template $SPARK_HOME/conf/docker.properties vi $SPARK_HOME/conf/docker.propertiesspark.mesos.executor.home: /usr/local/spark-2.3.0cp $SPARK_HOME/conf/fairscheduler.xml.template $SPARK_HOME/conf/fairscheduler.xml cp $SPARK_HOME/conf/log4j.properties.template $SPARK_HOME/conf/log4j.properties cp $SPARK_HOME/conf/metrics.properties.template $SPARK_HOME/conf/metrics.propertiescp $SPARK_HOME/conf/slaves.template $SPARK_HOME/conf/slaves vi $SPARK_HOME/conf/slavesnode1 node2 node3cp $SPARK_HOME/conf/spark-defaults.conf.template $SPARK_HOME/conf/spark-defaults.conf vi $SPARK_HOME/conf/spark-defaults.confspark.eventLog.enabled true spark.eventLog.dir hdfs://appcluster/spark/eventslog # 监控页面需要监控的目录,需要先启用和指定事件日志目录,配合上面两项使用 spark.history.fs.logDirectory hdfs://appcluster/spark spark.eventLog.compress true# 如果想 YARN ResourceManager 访问 Spark History Server ,则添加一行: # spark.yarn.historyServer.address http://node1:19888cp $SPARK_HOME/conf/spark-env.sh.template $SPARK_HOME/conf/spark-env.sh vi $SPARK_HOME/conf/spark-env.shexport SPARK_MASTER_PORT=7077 #提交任务的端口,默认是7077 export SPARK_MASTER_WEBUI_PORT=8070 #masster节点的webui端口 默认8080改为8070 export SPARK_WORKER_CORES=1 #每个worker从节点能够支配的core的个数 export SPARK_WORKER_MEMORY=1g #每个worker从节点能够支配的内存数 export SPARK_WORKER_PORT=7078 #每个worker从节点的端口(可选配置) export SPARK_WORKER_WEBUI_PORT=8071 #每个worker从节点的wwebui端口(可选配置) export SPARK_WORKER_INSTANCES=1 #每个worker从节点的实例(可选配置)export JAVA_HOME=/usr/java/jdk1.8.0_111 export SCALA_HOME=/usr/local/scala-2.12.4 export HADOOP_HOME=/usr/local/hadoop-2.7.6 export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop export YARN_CONF_DIR=$HADOOP_HOME/etc/Hadoop export SPARK_PID_DIR=/usr/local/spark-2.3.0/pids export SPARK_LOCAL_DIR=/usr/local/spark-2.3.0/tmp export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=node1:2181,node2:2181,node3:2181 -Dspark.deploy.zookeeper.dir=/spark"vi $SPARK_HOME/sbin/start-master.shSPARK_MASTER_WEBUI_PORT=8070cp $HADOOP_HOME/etc/hadoop/hdfs-site.xml $SPARK_HOME/conf/vi $HADOOP_HOME/etc/hadoop/log4j.propertieslog4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERRORscp -r $HADOOP_HOME/etc/hadoop/log4j.properties node2:$HADOOP_HOME/etc/hadoop/ scp -r $HADOOP_HOME/etc/hadoop/log4j.properties node3:$HADOOP_HOME/etc/hadoop/
# ==================================================================node1
scp -r $SPARK_HOME node2:/usr/local/ scp -r $SPARK_HOME node3:/usr/local/
# ==================================================================node2 node3
# 使环境变量生效 source /etc/profile# 查看配置结果 echo $FLUME_HOME
# 启动
# ==================================================================node1 node2 node3# 先启动zookeeper 和 hdfs zkServer.sh start zkServer.sh status# ==================================================================node1 zkCli.sh create /spark ''$HADOOP_HOME/sbin/start-all.sh$HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc# ==================================================================node2 $HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc $HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager
# 启动spark
# ==================================================================node1 $SPARK_HOME/sbin/start-master.sh$SPARK_HOME/sbin/start-slaves.sh# ==================================================================node2 $SPARK_HOME/sbin/start-master.sh# ==================================================================node1 # 获取安全模式的状态: hdfs dfsadmin -safemode get# 安全模式打开 # hdfs dfsadmin -safemode enter# 安全模式关闭 # hdfs dfsadmin -safemode leavehdfs dfs -mkdir -p /spark/eventslog$SPARK_HOME/bin/spark-shell# http://node1:4040 # http://node1:8070> :quit
# test
# 需保证hdfs上该目录不存在 # hdfs dfs -mkdir -p /spark/output # hdfs dfs -rmr /spark/outputvi ~/sparkdata.txthello man what are you doing now my running hello kevin hi manhdfs dfs -mkdir -p /usr/file/inputhdfs dfs -put ~/sparkdata.txt /usr/file/input hdfs dfs -ls /usr/file/inputval file1 = sc.textFile("file:///root/sparkdata.txt") val count1=file1.flatMap(line => line.split(" ")).map(word => (word,1)).reduceByKey(_+_) count1.saveAsTextFile("hdfs://node1:8020/spark/output1")val file=sc.textFile("hdfs://appcluster/usr/file/input/sparkdata.txt") val count=file.flatMap(line => line.split(" ")).map(word => (word,1)).reduceByKey(_+_) count.saveAsTextFile("hdfs://node1:8020/spark/output")hdfs dfs -ls /spark/outputhdfs dfs -cat /spark/output/part-00000
# stop已经启动的进程
# ==================================================================node1 $SPARK_HOME/sbin/stop-slaves.sh$SPARK_HOME/sbin/stop-master.sh$HADOOP_HOME/sbin/stop-all.sh# ==================================================================node1 node2 node3 # 停止 zookeeper zkServer.sh stop# ==================================================================node2 $HADOOP_HOME/sbin/yarn-daemon.sh stop resourcemanager $HADOOP_HOME/sbin/hadoop-daemon.sh stop zkfc# ==================================================================node1 $HADOOP_HOME/sbin/hadoop-daemon.sh stop zkfcshutdown -h now # 快照 spark
转载于:https://www.cnblogs.com/zcf5522/p/9775651.html
hadoop生态搭建(3节点)-10.spark配置相关推荐
- hadoop生态搭建(3节点)
软件:CentOS-7 VMware12 SSHSecureShellClient shell工具:Xshell 规划 vm网络配置 01.基础配置 02.ssh配置 03.zookeep ...
- hadoop生态搭建(3节点)-06.hbase配置
# http://archive.apache.org/dist/hbase/1.2.4/ # ==================================================== ...
- hadoop生态搭建(3节点)-07.hive配置
# http://archive.apache.org/dist/hive/hive-2.1.1/ # ================================================ ...
- hadoop生态搭建(3节点)-05.mysql配置_单节点
# ==================================================================node1 # ======================== ...
- 大数据集群搭建之节点的网络配置过程(二)
紧接着上一章来设置windows的vmnet8的ip地址和虚拟机中centos的ip地址. NAT虚拟网络的配置图如下图所示: 1.这里根据VMware中得到的网关地址去设置vmnet8的ip地址. ...
- Hadoop生态Flume(二)安装配置
一.flume下载地址 列中的链接应显示可用镜像的列表,并根据您的推断位置进行默认选择.如果看不到该页面,请尝试使用其他浏览器.校验和和签名是主分发服务器上原始文件的链接. Apache Flume二 ...
- centos7下搭建hadoop、hbase、hive、spark分布式系统架构
全栈工程师开发手册 (作者:栾鹏) 架构系列文章 如果想了解架构原理,可以参考:https://blog.csdn.net/luanpeng825485697/article/details/8031 ...
- 小知识点:ARM 架构 Linux 大数据集群基础环境搭建(Hadoop、MySQL、Hive、Spark、Flink、ZK、Kafka、Nginx、Node)
换了 M2 芯片的 Mac,以前 x86 版本的 Linux 大数据集群基础环境搭建在 ARM 架构的虚拟机集群上有些用不了了,现在重新写一份基于 ARM 架构的,少数不兼容之外其他都差不多,相当 ...
- Spark+Hadoop环境搭建
一.工具下载: 1.spark下载 目前最新的是2.1.1,spark 2.0开始api和之前的还是有比较多的变化,因此如果选择2.0以上版本,最好看一下api变化,下载地址:http://spark ...
最新文章
- 2014/3/16 长沙多校(第三次)
- 如何面向用户价值编写敏捷开发用户故事
- 经典FOXMAIL报错 winsock error 11004
- 玩吧高速增长的数据上云实践
- CSS3详解:background
- linux下字体怎么安装方法,linux安装字体方法
- 疫情加速中国服务器采购 数字化转型成为增长“新引擎”
- [SQL Server]关于15517号错误的一点想法
- 25muduo_net库源码分析(一)
- ubuntu gedit 工具菜单下没有 Manage external tools
- Vue中异步组件(结合webpack,转载)
- Javascript Python 翻译 excel trend 函数
- 图论算法真的那么难吗?知识点都在这了……
- Python常用标准库、模块
- Hadoop学习 第4-6章 Hadoop数据压缩、Yarn和企业优化
- 计算机网络技术广告,屏蔽QQ广告和迷你首页广告
- OA系统流程效率改进方案
- 顺丰路由查询(最新)
- 静态HTML网页设计作品——仿2018淘宝首页(1页) HTML+CSS+JavaScript 学生DW网页设计作业成品 web课程设计网页规划与设计 计算机毕设网页设计源码
- linux shell正则表达式如何匹配域名(包含中文域名)
热门文章
- Java中number数字类型的转换_Java下数字类型的转换 (转)
- Linux学习:第四章-vi编辑器
- mysql_real_escape_string 报错_addslashes与mysql_real_escape_string的区别
- C++:12---运算符重载
- chrome 使用gpu 加速_一招解决 Chrome / Edge 卡顿缓慢 让浏览器重回流畅顺滑
- C++primer 第 3 章 字符串、向量和数组 3 . 3 标准库类型vector
- 英语口语-文章朗读Week10 Thursday
- 高速行车12条技巧,每一条都关乎你的生命
- 长寿的十个秘诀 至少选择一个坚持实施
- 水滴石穿C语言之指针、数组和函数