docker run -itd dingms/ucas-bdms-hw-u64-2019:16.04 /bin/bash
docker ps
docker exec -it <CONTAINER ID> /bin/bash


service ssh start(每次重启容器,似乎都要重新开启ssh)


--------------------------------------------------------------------------------2                 README3 --------------------------------------------------------------------------------4 5 PLEASE save your code and data to your drive!6 WARNING: this VM will be cleaned without notice after you log out.7          Your code and data on the VM will be lost!!!8 9 ## Directory Layout10 11    * example:  example codes for HDFS and HBase12    * input:    input test data for homework 113 14 15 16 Please enter example, in order to follow the guide.17 18    $ cd example19 20 21 ## HDFS Usage: 22 23 ### Start and Stop24 25    $ start-dfs.sh26 27    then, run 'jps' to check whether following processes have been started:28 29     * NameNode30     * DataNode31     * SecondaryNameNode32 33 34    To stop HDFS, run35 36    $ start-dfs.sh37 38 39 ### HDFS Command List40 41    $ hadoop fs42 43    hdfs directory layout:44 45    $ hadoop fs -ls /46
47 48 ###. Run Example49 Description:50   put a file into HDFS by HDFS commands, and then write a Java program to51 read the file from HDFS52 53 1. put file to HDFS54 55    $ hadoop fs -mkdir /hw1-input56    $ hadoop fs -put README.md /hw1-input57    $ hadoop fs -ls -R /hw1-input58 59 2. write a Java program  @see ./HDFSTest.java60 61 3. compile and run Java program62 63    $ javac HDFSTest.java64 65    $ java HDFSTest hdfs://localhost:9000/hw1-input/README.md66 67 68 69 ## HBase Usage: 70 71 ### Start and Stop72 73 Start HDFS at first, then HBase.74    $ start-dfs.sh75    $ start-hbase.sh76 77    then, run 'jps' to check whether following processes have been started:78 79    * NameNode80    * DataNode81    * SecondaryNameNode82    * HMaster83    * HRegionServer84    * HQuorumPeer85 86    To stop HDFS, run87 88    $ stop-hbase.sh89    $ start-dfs.sh90 91 92 ###. Run Example93 Description:94    put records into HBase95 96 1. write a Java program  @see ./HBaseTest.java97 98 2. compile and run Java program99
100    $ javac HBaseTest.java
102    $ java HBaseTest
104 3. check
106     $ hbase shell
108     hbase(main):001:0> scan 'mytable'
109     ROW                                                  COLUMN+CELL
110      abc                                                 column=mycf:a, timestamp=1428459927307, value=789
111     1 row(s) in 1.8950 seconds
113     hbase(main):002:0> disable 'mytable'
114     0 row(s) in 1.9050 seconds
116     hbase(main):003:0> drop 'mytable'
117     0 row(s) in 1.2320 seconds
119     hbase(main):004:0> exit
121 --------------------------------------------------------------------------------
122 version: 2019-spring




/*2  * Make sure that the classpath contains all the hbase libraries3  *4  * Compile:5  *  javac HBaseTest.java6  *7  * Run: 8  *  java HBaseTest9  */10 11 import java.io.IOException;12 13 import org.apache.hadoop.conf.Configuration;14 import org.apache.hadoop.hbase.HBaseConfiguration;15 import org.apache.hadoop.hbase.HColumnDescriptor;16 import org.apache.hadoop.hbase.HTableDescriptor;17 import org.apache.hadoop.hbase.MasterNotRunningException;18 import org.apache.hadoop.hbase.TableName;19 import org.apache.hadoop.hbase.ZooKeeperConnectionException;20 import org.apache.hadoop.hbase.client.HBaseAdmin;21 import org.apache.hadoop.hbase.client.HTable;22 import org.apache.hadoop.hbase.client.Put;23 24 import org.apache.log4j.*;25 26 public class HBaseTest {27 28   public static void main(String[] args) throws MasterNotRunningException, ZooKeeperConnectionException, IOException {29 30     Logger.getRootLogger().setLevel(Level.WARN);31 32     // create table descriptor33     String tableName= "mytable";34     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));35 36     // create column descriptor37     HColumnDescriptor cf = new HColumnDescriptor("mycf");38     htd.addFamily(cf);39 40     // configure HBase41     Configuration configuration = HBaseConfiguration.create();42     HBaseAdmin hAdmin = new HBaseAdmin(configuration);43 44     if (hAdmin.tableExists(tableName)) {45         System.out.println("Table already exists");46     }47     else {48         hAdmin.createTable(htd);49         System.out.println("table "+tableName+ " created successfully");50     }51     hAdmin.close();52 53     // put "mytable","abc","mycf:a","789"54 55     HTable table = new HTable(configuration,tableName);56     Put put = new Put("abc".getBytes());57     put.add("mycf".getBytes(),"a".getBytes(),"789".getBytes());58     table.put(put);59     table.close();60     System.out.println("put successfully");61   }62 }


1 import java.io.*;2 import java.net.URI;3 import java.net.URISyntaxException;4 5 import org.apache.hadoop.conf.Configuration;6 import org.apache.hadoop.fs.FSDataInputStream;7 import org.apache.hadoop.fs.FSDataOutputStream;8 import org.apache.hadoop.fs.FileSystem;9 import org.apache.hadoop.fs.Path;10 import org.apache.hadoop.io.IOUtils;11 12 /**13  *complie HDFSTest.java14  *15  * javac HDFSTest.java 16  *17  *execute HDFSTest.java18  *19  * java HDFSTest  20  * 21  */22 23 public class HDFSTest {24 25     public static void main(String[] args) throws IOException, URISyntaxException{26         if (args.length <= 0) {27             System.out.println("Usage: HDFSTest <hdfs-file-path>");28             System.exit(1);29         }30 31         String file = args[0];32 33         Configuration conf = new Configuration();34         FileSystem fs = FileSystem.get(URI.create(file), conf);35         Path path = new Path(file);36         FSDataInputStream in_stream = fs.open(path);37 38         BufferedReader in = new BufferedReader(new InputStreamReader(in_stream));39         String s;40         while ((s=in.readLine())!=null) {41              System.out.println(s);42         }43 44         in.close();45 46         fs.close();47     }48 }






import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map.Entry;
import java.util.AbstractMap.SimpleEntry;import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.log4j.*;//建立public类
public class Hw1Grp0 {private static String fileR;    //File R nameprivate static String fileS;    private static int joinR;       // R join keyprivate static int joinS;       private static ArrayList<Integer> resRs = new ArrayList<> (); //R result column number arrayprivate static ArrayList<Integer> resSs = new ArrayList<> (); private static ArrayList<String> resRStrs = new ArrayList<> ();//R column family string arrayprivate static ArrayList<String> resSStrs = new ArrayList<> ();private static boolean isREmpty = true; //Does res contain R's column private static boolean isSEmpty = true;/*** hashmap used for join* the String type key is the join key* the Entry's key is the LinkedList of R's columns that in the res* the Entry's value is the LinkedList of S's columns that in the res*///使用Java内置的hashmap生成哈希表private static HashMap<String, Entry<LinkedList<LinkedList<String>>,LinkedList<LinkedList<String>>>> joinMap = new HashMap<> ();private static HTable table; /*** process the arguments* retracts the file name, join key, and res column from the args into the member variable.* @param input-arguments* @return void*///进行join连接private static void processArgs(String[] args){int index = 0;String resStr;if(args.length!=4){//进行join连接System.out.println("Usage: Hw1Grp0 R=<file-1> S=<file-2> join:R*=S* res=R*,S*");System.exit(1);}//打开文件fileR = args[0].substring(2);fileS = args[1].substring(2);index = args[2].indexOf('=');joinR = Integer.valueOf(args[2].substring(6,index));joinS = Integer.valueOf(args[2].substring(index+2));index = args[3].indexOf(',');resStr = args[3].substring(4);String[] resStrs = resStr.split(",");for(String s : resStrs){System.out.println(s);if(s.startsWith("R")){resRs.add(Integer.valueOf(s.substring(1)));resRStrs.add(s);isREmpty = false;   }else{resSs.add(Integer.valueOf(s.substring(1)));resSStrs.add(s);isSEmpty = false;}}}/*** Read File R and File S from HDFS line by line and Map them into the joinMap.* @param void* @return void*/private static void readFileFromHDFS() throws IOException, URISyntaxException{Configuration conf = new Configuration();FileSystem fs = FileSystem.get(conf);Path pathR = new Path(fileR);Path pathS = new Path(fileS);FSDataInputStream in_streamR = fs.open(pathR);FSDataInputStream in_streamS = fs.open(pathS);BufferedReader inR = new BufferedReader(new InputStreamReader(in_streamR));BufferedReader inS = new BufferedReader(new InputStreamReader(in_streamS));String r, s;while((r=inR.readLine())!=null){String[] tmp = r.split("\\|");String joinKey = tmp[joinR];LinkedList<String> joinValues = new LinkedList<String> ();if(joinMap.containsKey(joinKey)){for(int i : resRs){joinValues.add(tmp[i]);}if(isREmpty){joinValues.add("");}joinMap.get(joinKey).getKey().add(joinValues);}else{for(int i : resRs){joinValues.add(tmp[i]);}if(isREmpty){joinValues.add("");}LinkedList<LinkedList<String>> rValues = new LinkedList<> ();LinkedList<LinkedList<String>> sValues = new LinkedList<> ();rValues.add(joinValues);Entry<LinkedList<LinkedList<String>>, LinkedList<LinkedList<String>>> pair = new SimpleEntry<>(rValues, sValues);joinMap.put(joinKey, pair);}}while((s=inS.readLine())!=null){String[] tmp = s.split("\\|");String joinKey = tmp[joinS];LinkedList<String> joinValues = new LinkedList<String> ();if(joinMap.containsKey(joinKey)){for(int i : resSs){joinValues.add(tmp[i]);}if(isSEmpty)joinValues.add("");joinMap.get(joinKey).getValue().add(joinValues);}}inR.close();inS.close();fs.close();}/*** create Result table in HBase* @param: void* @return: void*/private static void createHBaseTable() throws IOException, URISyntaxException{Logger.getRootLogger().setLevel(Level.WARN);// create table descriptorString tableName= "Result";HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));// create column descriptorHColumnDescriptor cf = new HColumnDescriptor("res");htd.addFamily(cf);// configure HBaseConfiguration configuration = HBaseConfiguration.create();HBaseAdmin hAdmin = new HBaseAdmin(configuration);if (hAdmin.tableExists(tableName)) {System.out.println("Table already exists");hAdmin.disableTable(tableName);hAdmin.deleteTable(tableName);System.out.println("Table has been deleted");}hAdmin.createTable(htd);System.out.println("table "+tableName+ " created successfully");hAdmin.close();table = new HTable(configuration,tableName);table.setAutoFlush(false);table.setWriteBufferSize(64*1024*1024);}/***  Use the joinMap to decide which record to put into the Result table.* @param: void* @return: void*/private static void hashJoin() throws IOException, URISyntaxException{for(String joinKey : joinMap.keySet()){int count = 0;Entry<LinkedList<LinkedList<String>>, LinkedList<LinkedList<String>>> entry = joinMap.get(joinKey);LinkedList<LinkedList<String>> rValues = entry.getKey();LinkedList<LinkedList<String>> sValues = entry.getValue();if(sValues.size()==0)continue;for(LinkedList<String> rValue : rValues){for(LinkedList<String> sValue : sValues){String countStr = "";if(count != 0){countStr = "." + Integer.toString(count);}if(!isREmpty){for(int i = 0; i < rValue.size(); i++ ){Put put = new Put(joinKey.getBytes());put.add("res".getBytes(), (resRStrs.get(i) + countStr).getBytes(), rValue.get(i).getBytes());table.put(put); }}if(!isSEmpty){for(int i = 0; i < sValue.size(); i++ ){Put put = new Put(joinKey.getBytes());put.add("res".getBytes(), (resSStrs.get(i) + countStr).getBytes(), sValue.get(i).getBytes());table.put(put); }} count++ ;}}}table.flushCommits();table.close();}public static void main (String[] args) throws IOException, URISyntaxException{//进行join的方法processArgs(args);//从hbase中读取的方法readFileFromHDFS();//创建哈希表createHBaseTable();//进行 连接hashJoin(); return;}}






sudo docker cp /home/abc/bigdata/hw1-check/0_202028018629028_hw1.java 9d44ee5dfe3b:/home/bdmssudo docker cp /home/abc/bigdata/hw1-check-v1.1.tar.gz 9d44:/home/bdms/homework/hw1



//readme.md0. set language to POSIX$ export LC_ALL="POSIX"1. make sure ssh is running$ service ssh statusif not, then run sshd (note that this is necessary in a docker container)$ service ssh start2. make sure HDFS and HBase are successfully started$ start-dfs.sh$ start-hbase.shcheck if hadoop and hbase are running correctly$ jps5824 Jps5029 HMaster5190 HRegionServer4950 HQuorumPeer4507 SecondaryNameNode4173 NameNode4317 DataNode3. put input files into HDFS$ ./myprepare4. check file name format$ ./check-group.pl <your-java-file>5. check if the file can be compiled$ ./check-compile.pl <your-java-file>6. run test$ ./run-test.pl ./score <your-java-file>Your score will be in ./score.  The run-test.pl tests 3 input cases, you will
get one score for each case.  So the output full score is 3.To run the test again, you need to first remove ./score$ rm ./score$ ./run-test.pl ./score <your-java-file>








