Spark 集群配置

salve 上, root@slave2:/opt/spark-2.1.0-bin-hadoop2.6/conf# vim spark-env.sh:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64
export SCALA_HOME=/opt/scala-2.11.8
export SPARK_MASTER_IP=namenode
export HADOOP_HOME=/opt/hadoop-2.6.5
export SPARK_WORKER_CORES=3
export SPARK_WORKER_MEMORY=12g
export HADOOP_CONF_DIR=/opt/hadoop-2.6.5/etc/hadoop
export HADOOP_CONF_LIB_NATIVE_DIR=/opt/hadoop-2.6.5/lib/native
export HADOOP_MAPRED_HOME=/opt/hadoop-2.6.5
export HADOOP_COMMON_HOME=/opt/hadoop-2.6.5
export HADOOP_HDFS_HOME=/opt/hadoop-2.6.5
export YARN_HOME=/opt/hadoop-2.6.5
export HADOOP_INSTALL=/opt/hadoop-2.6.5
export YARN_CONF_DIR=/opt/hadoop-2.6.5/etc/hadoop
export SPARK_HOME=/opt/spark-2.1.0-bin-hadoop2.6
export SPARK_CLASSPATH=/opt/hadoop-2.6.5/etc/hadoop:/opt/hadoop-2.6.5/share/hadoop/common/lib/*:/opt/hadoop-2.6.5/share/hadoop/common/*:/opt/hadoop-2.6.5/share/hadoop/hdfs:/opt/hadoop-2.6.5/share/hadoop/hdfs/lib/*:/opt/hadoop-2.6.5/share/hadoop/hdfs/*:/opt/hadoop-2.6.5/share/hadoop/yarn/lib/*:/opt/hadoop-2.6.5/share/hadoop/yarn/*:/opt/hadoop-2.6.5/share/hadoop/mapreduce/lib/*:/opt/hadoop-2.6.5/share/hadoop/mapreduce/*:/opt/hadoop-2.6.5/contrib/capacity-scheduler/*.jar

slave2上 yarn-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8035</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>0.0.0.0:8088</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>7168</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<!--<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>4096</value>
</property>
-->
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>

<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>

/opt/spark-2.1.0-bin-hadoop2.6/conf# vim spark-defaults.conf:

1
2
3
4
5
6
7
8
# Example:
# spark.master spark://master:7077
# spark.eventLog.enabled true
# spark.eventLog.dir hdfs://namenode:8021/directory
# spark.serializer org.apache.spark.serializer.KryoSerializer
spark.driver.memory 12g
spark.scheduler.mode FAIR
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"

/opt/spark-2.1.0-bin-hadoop2.6/conf# vim slaves

1
2
3
4
slave1
slave2
slave3
slave4