1. Move the file from local to hdfs

    weatherData.txt

    hdfs dfs -copyFromLocal "C:\\Users\\labuser\\Desktop\\MCA54\\WeatherDataProject\\assets\\weatherData.txt" /mca54
    

    image.png

    image.png

  2. Check whether the file is copied to hdfs

    hdfs dfs -ls /mca54
    
  3. Check hadoop user interface (Namenode Status) and the File we’ve copied just now.

  4. Now create one Java Project using vs code (WeatherDataProject)

  5. Copy jar file from hadoop (hdfs > common and mapreduce dir) and paste in lib folder of java project

    image.png

  6. Java file TemperatureDayClassifier.java (Even roll number assignment)

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    public class TemperatureDayClassifier {
    
        public static class TempMapper extends Mapper<Object, Text, Text, Text> {
    
            private Text outDate = new Text();
            private Text outLabel = new Text();
    
            @Override
            public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
                String line = value.toString().trim();
    
                // Skip header or empty lines
                if (line.isEmpty() || line.startsWith("weather")) {
                    return;
                }
    
                // Split by whitespace (tab or spaces)
                String[] parts = line.split("\\\\s+");
                if (parts.length < 9) {
                    return; // skip malformed lines
                }
    
                try {
                    String date = parts[1]; // date column (YYYYMMDD)
                    double maxTemp = Double.parseDouble(parts[5]);
                    double minTemp = Double.parseDouble(parts[6]);
    
                    outDate.set(formatDate(date));
    
                    // Check for Hot Day or Cold Day
                    if (maxTemp > 40) {
                        outLabel.set("Hot Day");
                        context.write(outDate, outLabel);
                    } 
                    if (minTemp < 10) {
                        outLabel.set("Cold Day");
                        context.write(outDate, outLabel);
                    }
    
                } catch (NumberFormatException e) {
                    // Ignore lines with invalid numbers
                }
            }
    
            // Helper to convert date from YYYYMMDD to MM-DD-YYYY format
            private String formatDate(String yyyymmdd) {
                if (yyyymmdd.length() != 8) return yyyymmdd;
                String year = yyyymmdd.substring(0, 4);
                String month = yyyymmdd.substring(4, 6);
                String day = yyyymmdd.substring(6, 8);
                return month + "-" + day + "-" + year;
            }
        }
    
        public static class TempReducer extends Reducer<Text, Text, Text, Text> {
            @Override
            public void reduce(Text key, Iterable<Text> values, Context context)
                    throws IOException, InterruptedException {
    
                // A date can be tagged as Hot Day, Cold Day, or both
                boolean hotDay = false;
                boolean coldDay = false;
    
                for (Text val : values) {
                    String label = val.toString();
                    if (label.equals("Hot Day")) {
                        hotDay = true;
                    } else if (label.equals("Cold Day")) {
                        coldDay = true;
                    }
                }
    
                if (hotDay) {
                    context.write(key, new Text("Hot Day"));
                }
                if (coldDay) {
                    context.write(key, new Text("Cold Day"));
                }
            }
        }
    
        public static void main(String[] args) throws Exception {
    
            if (args.length != 2) {
                System.err.println("Usage: TemperatureDayClassifier <input path> <output path>");
                System.exit(-1);
            }
    
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf, "Find Hot and Cold Days");
    
            job.setJarByClass(TemperatureDayClassifier.class);
            job.setMapperClass(TempMapper.class);
            job.setReducerClass(TempReducer.class);
    
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
    
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
            System.exit(job.waitForCompletion(true) ? 0 : 1);
        }
    }
    
  7. Create output folder inside java project

    1. command to compile TemperatureDayClassifier.java file

      javac --release 8 -cp "lib/*" -d output "src/TemperatureDayClassifier.java"
      

      image.png

    2. command to create a jar file for the TemperatureDayClassifier program

      jar -cvf src/TemperatureDayClassifier.jar -C output/ .
      

      image.png

  8. command to run hadoop jar

    hadoop jar C:\\Users\\labuser\\Desktop\\MCA54\\WeatherDataProject\\src\\TemperatureDayClassifier.jar TemperatureDayClassifier /mca54/weatherData.txt /mca54/output/TemperatureDayClassifier
    

    "C:....jar" is the path of the jar file present in the local system

    "/mca54/weatherData.txt" is the path of the input files present on the hadoop server.

    "/mca54/output/TemperatureDayClassifier" is the path of the output folder where i wish to upload all the output files on the hadoop server.

    image.png

    image.png