1. Move the file from local to hdfs

    hdfs dfs -copyFromLocal "C:\\Users\\labuser\\Desktop\\MCA54\\CountNumberOfSubPatent\\assets\\patent" /mca54
    

    image.png

  2. Check whether the file is copied to hdfs

    hdfs dfs -ls /mca54
    

    image.png

  3. Check hadoop user interface (Namenode Status) and the File we’ve copied just now.

    image.png

    image.png

    image.png

  4. Now create one Java Project using vs code (CountNumberOfSubPatent)

  5. Copy jar file from hadoop (hdfs > common and mapreduce dir) and paste in lib folder of java project

    image.png

  6. Java file ContNumberOfSubPatents.java (Even assignment (SubPatent by Patent))

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    public class ContNumberOfSubPatents {
    
        // Mapper Class
        public static class PatentMapper extends Mapper<Object, Text, IntWritable, IntWritable> {
            private final static IntWritable one = new IntWritable(1);
            private IntWritable patentId = new IntWritable();
        
            public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
                String line = value.toString().trim();
        
                if (line.isEmpty()) return;
        
                String[] parts = line.split("\\\\s+");
        
                if (parts.length == 2) {
                    try {
                        int id = Integer.parseInt(parts[0]);  // convert patent ID string to int
                        patentId.set(id);
                        context.write(patentId, one);  // emit (int patent ID, 1)
                    } catch (NumberFormatException e) {
                        // Ignore lines where patent ID is not an integer
                    }
                }
            }
        }
        
    
        // Reducer Class
        public static class CountReducer extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
            private IntWritable result = new IntWritable();
        
            public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
                int sum = 0;
                for (IntWritable val : values) {
                    sum += val.get();
                }
                result.set(sum);
                context.write(key, result);
            }
        }
    
        // Main Method (Driver)
        public static void main(String[] args) throws Exception {
    
            if (args.length != 2) {
                System.err.println("Usage: ContNumberOfSubPatents <input path> <output path>");
                System.exit(-1);
            }
    
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf, "Count Number of Sub-Patents");
    
            job.setJarByClass(ContNumberOfSubPatents.class);
            job.setMapperClass(PatentMapper.class);
            job.setCombinerClass(CountReducer.class);
            job.setReducerClass(CountReducer.class);
    
            job.setOutputKeyClass(IntWritable.class);
            job.setOutputValueClass(IntWritable.class);
    
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
            System.exit(job.waitForCompletion(true) ? 0 : 1);
        }
    }
    
  7. Create output folder inside java project

    1. command to compile ContNumberOfSubPatents.java file

      javac --release 8 -cp "lib/*" -d output "src/ContNumberOfSubPatents.java"
      

      image.png

    2. command to create a jar file for the ContNumberOfSubPatents program

      jar -cvf src/ContNumberOfSubPatents.jar -C output/ .
      

      image.png

  8. command to run hadoop jar

    hadoop jar C:\\Users\\labuser\\Desktop\\MCA54\\CountNumberOfSubPatent\\src\\ContNumberOfSubPatents.jar ContNumberOfSubPatents /mca54/patent /mca54/output/ContNumberOfSubPatents
    

    "C:\\Users\\labuser\\Desktop\\MCA54\\CountNumberOfSubPatent\\src\\ContNumberOfSubPatents.jar" is the path of the jar file present in the local system

    "/mca54/patent" is the path of the input files present on the hadoop server.

    "/mca54/ContNumberOfSubPatents/output" is the path of the output folder where i wish to upload all the output files on the hadoop server.

    image.png

    image.png

  9. Java file ContNumberOfMainPatentsBySubPatent.java (Odd assignment (Patent by SubPatent))

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    public class ContNumberOfMainPatentsBySubPatent {
    
        // Mapper Class
        public static class SubPatentMapper extends Mapper<Object, Text, IntWritable, IntWritable> {
            private final static IntWritable one = new IntWritable(1);
            private IntWritable subPatentId = new IntWritable();
    
            public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
                String line = value.toString().trim();
    
                if (line.isEmpty()) return;
    
                String[] parts = line.split("\\\\s+");
    
                // Expect two parts: main patent, sub-patent
                if (parts.length == 2) {
                    try {
                        int subId = Integer.parseInt(parts[1]);  // sub-patent ID is second part
                        subPatentId.set(subId);
                        context.write(subPatentId, one);  // emit (sub-patent ID, 1)
                    } catch (NumberFormatException e) {
                        // Ignore lines where sub-patent ID is not integer
                    }
                }
            }
        }
    
        // Reducer Class
        public static class CountReducer extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
            private IntWritable result = new IntWritable();
    
            public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
                int sum = 0;
                for (IntWritable val : values) {
                    sum += val.get();
                }
                result.set(sum);
                context.write(key, result);
            }
        }
    
        // Main Method (Driver)
        public static void main(String[] args) throws Exception {
    
            if (args.length != 2) {
                System.err.println("Usage: ContNumberOfMainPatentsBySubPatent <input path> <output path>");
                System.exit(-1);
            }
    
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf, "Count Number of Main Patents by Sub-Patent");
    
            job.setJarByClass(ContNumberOfMainPatentsBySubPatent.class);
            job.setMapperClass(SubPatentMapper.class);
            job.setCombinerClass(CountReducer.class);
            job.setReducerClass(CountReducer.class);
    
            job.setOutputKeyClass(IntWritable.class);
            job.setOutputValueClass(IntWritable.class);
    
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
            System.exit(job.waitForCompletion(true) ? 0 : 1);
        }
    }
    
  10. Create output folder inside java project

    1. command to compile ContNumberOfMainPatentsBySubPatent.java file

      javac --release 8 -cp "lib/*" -d output/ContNumberOfMainPatentsBySubPatent "src/ContNumberOfMainPatentsBySubPatent.java"
      

      image.png

    2. command to create a jar file for the ContNumberOfMainPatentsBySubPatent program

      jar -cvf src/ContNumberOfMainPatentsBySubPatent.jar -C output/ContNumberOfMainPatentsBySubPatent .
      

      image.png

  11. command to run hadoop jar

    hadoop jar C:\\Users\\labuser\\Desktop\\MCA54\\CountNumberOfSubPatent\\src\\ContNumberOfMainPatentsBySubPatent.jar ContNumberOfMainPatentsBySubPatent /mca54/patent /mca54/output/ContNumberOfMainPatentsBySubPatent
    

    image.png