Move the file from local to hdfs
hdfs dfs -copyFromLocal "C:\\Users\\labuser\\Desktop\\MCA54\\WeatherDataProject\\assets\\weatherData.txt" /mca54
Check whether the file is copied to hdfs
hdfs dfs -ls /mca54
Check hadoop user interface (Namenode Status) and the File we’ve copied just now.
Now create one Java Project using vs code (WeatherDataProject
)
Copy jar file from hadoop (hdfs
> common
and mapreduce
dir) and paste in lib folder of java project
Java file TemperatureDayClassifier.java
(Even roll number assignment)
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class TemperatureDayClassifier {
public static class TempMapper extends Mapper<Object, Text, Text, Text> {
private Text outDate = new Text();
private Text outLabel = new Text();
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString().trim();
// Skip header or empty lines
if (line.isEmpty() || line.startsWith("weather")) {
return;
}
// Split by whitespace (tab or spaces)
String[] parts = line.split("\\\\s+");
if (parts.length < 9) {
return; // skip malformed lines
}
try {
String date = parts[1]; // date column (YYYYMMDD)
double maxTemp = Double.parseDouble(parts[5]);
double minTemp = Double.parseDouble(parts[6]);
outDate.set(formatDate(date));
// Check for Hot Day or Cold Day
if (maxTemp > 40) {
outLabel.set("Hot Day");
context.write(outDate, outLabel);
}
if (minTemp < 10) {
outLabel.set("Cold Day");
context.write(outDate, outLabel);
}
} catch (NumberFormatException e) {
// Ignore lines with invalid numbers
}
}
// Helper to convert date from YYYYMMDD to MM-DD-YYYY format
private String formatDate(String yyyymmdd) {
if (yyyymmdd.length() != 8) return yyyymmdd;
String year = yyyymmdd.substring(0, 4);
String month = yyyymmdd.substring(4, 6);
String day = yyyymmdd.substring(6, 8);
return month + "-" + day + "-" + year;
}
}
public static class TempReducer extends Reducer<Text, Text, Text, Text> {
@Override
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
// A date can be tagged as Hot Day, Cold Day, or both
boolean hotDay = false;
boolean coldDay = false;
for (Text val : values) {
String label = val.toString();
if (label.equals("Hot Day")) {
hotDay = true;
} else if (label.equals("Cold Day")) {
coldDay = true;
}
}
if (hotDay) {
context.write(key, new Text("Hot Day"));
}
if (coldDay) {
context.write(key, new Text("Cold Day"));
}
}
}
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: TemperatureDayClassifier <input path> <output path>");
System.exit(-1);
}
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Find Hot and Cold Days");
job.setJarByClass(TemperatureDayClassifier.class);
job.setMapperClass(TempMapper.class);
job.setReducerClass(TempReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Create output
folder inside java project
command to compile TemperatureDayClassifier.java
file
javac --release 8 -cp "lib/*" -d output "src/TemperatureDayClassifier.java"
command to create a jar file for the TemperatureDayClassifier program
jar -cvf src/TemperatureDayClassifier.jar -C output/ .
command to run hadoop jar
hadoop jar C:\\Users\\labuser\\Desktop\\MCA54\\WeatherDataProject\\src\\TemperatureDayClassifier.jar TemperatureDayClassifier /mca54/weatherData.txt /mca54/output/TemperatureDayClassifier
"C:....jar"
is the path of the jar file present in the local system
"/mca54/weatherData.txt"
is the path of the input files present on the hadoop server.
"/mca54/output/TemperatureDayClassifier"
is the path of the output folder where i wish to upload all the output files on the hadoop server.