Move the file from local to hdfs
hdfs dfs -copyFromLocal "C:\\Users\\labuser\\Desktop\\MCA54\\CountNumberOfSubPatent\\assets\\patent" /mca54
Check whether the file is copied to hdfs
hdfs dfs -ls /mca54
Check hadoop user interface (Namenode Status) and the File we’ve copied just now.
Now create one Java Project using vs code (CountNumberOfSubPatent
)
Copy jar file from hadoop (hdfs
> common
and mapreduce
dir) and paste in lib folder of java project
Java file ContNumberOfSubPatents.java
(Even assignment (SubPatent by Patent)
)
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class ContNumberOfSubPatents {
// Mapper Class
public static class PatentMapper extends Mapper<Object, Text, IntWritable, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private IntWritable patentId = new IntWritable();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString().trim();
if (line.isEmpty()) return;
String[] parts = line.split("\\\\s+");
if (parts.length == 2) {
try {
int id = Integer.parseInt(parts[0]); // convert patent ID string to int
patentId.set(id);
context.write(patentId, one); // emit (int patent ID, 1)
} catch (NumberFormatException e) {
// Ignore lines where patent ID is not an integer
}
}
}
}
// Reducer Class
public static class CountReducer extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
// Main Method (Driver)
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: ContNumberOfSubPatents <input path> <output path>");
System.exit(-1);
}
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Count Number of Sub-Patents");
job.setJarByClass(ContNumberOfSubPatents.class);
job.setMapperClass(PatentMapper.class);
job.setCombinerClass(CountReducer.class);
job.setReducerClass(CountReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Create output
folder inside java project
command to compile ContNumberOfSubPatents.java
file
javac --release 8 -cp "lib/*" -d output "src/ContNumberOfSubPatents.java"
command to create a jar file for the ContNumberOfSubPatents program
jar -cvf src/ContNumberOfSubPatents.jar -C output/ .
command to run hadoop jar
hadoop jar C:\\Users\\labuser\\Desktop\\MCA54\\CountNumberOfSubPatent\\src\\ContNumberOfSubPatents.jar ContNumberOfSubPatents /mca54/patent /mca54/output/ContNumberOfSubPatents
"C:\\Users\\labuser\\Desktop\\MCA54\\CountNumberOfSubPatent\\src\\ContNumberOfSubPatents.jar"
is the path of the jar file present in the local system
"/mca54/patent"
is the path of the input files present on the hadoop server.
"/mca54/ContNumberOfSubPatents/output"
is the path of the output folder where i wish to upload all the output files on the hadoop server.
Java file ContNumberOfMainPatentsBySubPatent.java
(Odd assignment (Patent by SubPatent)
)
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class ContNumberOfMainPatentsBySubPatent {
// Mapper Class
public static class SubPatentMapper extends Mapper<Object, Text, IntWritable, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private IntWritable subPatentId = new IntWritable();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString().trim();
if (line.isEmpty()) return;
String[] parts = line.split("\\\\s+");
// Expect two parts: main patent, sub-patent
if (parts.length == 2) {
try {
int subId = Integer.parseInt(parts[1]); // sub-patent ID is second part
subPatentId.set(subId);
context.write(subPatentId, one); // emit (sub-patent ID, 1)
} catch (NumberFormatException e) {
// Ignore lines where sub-patent ID is not integer
}
}
}
}
// Reducer Class
public static class CountReducer extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
// Main Method (Driver)
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: ContNumberOfMainPatentsBySubPatent <input path> <output path>");
System.exit(-1);
}
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Count Number of Main Patents by Sub-Patent");
job.setJarByClass(ContNumberOfMainPatentsBySubPatent.class);
job.setMapperClass(SubPatentMapper.class);
job.setCombinerClass(CountReducer.class);
job.setReducerClass(CountReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Create output
folder inside java project
command to compile ContNumberOfMainPatentsBySubPatent.java
file
javac --release 8 -cp "lib/*" -d output/ContNumberOfMainPatentsBySubPatent "src/ContNumberOfMainPatentsBySubPatent.java"
command to create a jar file for the ContNumberOfMainPatentsBySubPatent program
jar -cvf src/ContNumberOfMainPatentsBySubPatent.jar -C output/ContNumberOfMainPatentsBySubPatent .
command to run hadoop jar
hadoop jar C:\\Users\\labuser\\Desktop\\MCA54\\CountNumberOfSubPatent\\src\\ContNumberOfMainPatentsBySubPatent.jar ContNumberOfMainPatentsBySubPatent /mca54/patent /mca54/output/ContNumberOfMainPatentsBySubPatent