From bc12af422df102bde8ab0cc50d11984ddd7f2105 Mon Sep 17 00:00:00 2001
From: Sebastian Meragelman <arielmeragelman@gmail.com>
Date: Thu, 22 Sep 2022 07:47:44 -0300
Subject: [PATCH 1/2] Create mapper and reducer example files

---
 bigdata/src/C_H_mapper.py  | 19 ++++++++++++++++++
 bigdata/src/C_H_reducer.py | 40 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 bigdata/src/C_H_mapper.py
 create mode 100644 bigdata/src/C_H_reducer.py

diff --git a/bigdata/src/C_H_mapper.py b/bigdata/src/C_H_mapper.py
new file mode 100644
index 0000000..ff4cb50
--- /dev/null
+++ b/bigdata/src/C_H_mapper.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+"""mapper.py"""
+
+import sys
+
+# input comes from STDIN (standard input)
+for line in sys.stdin:
+    # remove leading and trailing whitespace
+    line = line.strip()
+    # split the line into words
+    words = line.split()
+    # increase counters
+    for word in words:
+        # write the results to STDOUT (standard output);
+        # what we output here will be the input for the
+        # Reduce step, i.e. the input for reducer.py
+        #
+        # tab-delimited; the trivial word count is 1
+        print ('%s\t%s' % (word, 1))
\ No newline at end of file
diff --git a/bigdata/src/C_H_reducer.py b/bigdata/src/C_H_reducer.py
new file mode 100644
index 0000000..d8a05bb
--- /dev/null
+++ b/bigdata/src/C_H_reducer.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+"""reducer.py"""
+
+from operator import itemgetter
+import sys
+
+current_word = None
+current_count = 0
+word = None
+
+# input comes from STDIN
+for line in sys.stdin:
+    # remove leading and trailing whitespace
+    line = line.strip()
+
+    # parse the input we got from mapper.py
+    word, count = line.split('\t', 1)
+
+    # convert count (currently a string) to int
+    try:
+        count = int(count)
+    except ValueError:
+        # count was not a number, so silently
+        # ignore/discard this line
+        continue
+
+    # this IF-switch only works because Hadoop sorts map output
+    # by key (here: word) before it is passed to the reducer
+    if current_word == word:
+        current_count += count
+    else:
+        if current_word:
+            # write result to STDOUT
+            print ('%s\t%s' % (current_word, current_count))
+        current_count = count
+        current_word = word
+
+# do not forget to output the last word if needed!
+if current_word == word:
+    print ('%s\t%s' % (current_word, current_count))
\ No newline at end of file

From 8eac54bfb0ddab10ec8fe2dbe2959437189e9e8a Mon Sep 17 00:00:00 2001
From: Sebastian Meragelman <arielmeragelman@gmail.com>
Date: Thu, 22 Sep 2022 07:55:24 -0300
Subject: [PATCH 2/2] test input output working

---
 bigdata/src/C_H_mapper.py  |  7 +++++--
 bigdata/src/C_H_reducer.py | 15 ++++++++++++---
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/bigdata/src/C_H_mapper.py b/bigdata/src/C_H_mapper.py
index ff4cb50..8a527d5 100644
--- a/bigdata/src/C_H_mapper.py
+++ b/bigdata/src/C_H_mapper.py
@@ -1,13 +1,15 @@
 #!/usr/bin/env python
 """mapper.py"""
+# TEST FILE TO TEST INPUT OUTPUT 
 
 import sys
 
 # input comes from STDIN (standard input)
 for line in sys.stdin:
     # remove leading and trailing whitespace
-    line = line.strip()
+    print("passed by mapper {}".format(line))
     # split the line into words
+'''
     words = line.split()
     # increase counters
     for word in words:
@@ -16,4 +18,5 @@
         # Reduce step, i.e. the input for reducer.py
         #
         # tab-delimited; the trivial word count is 1
-        print ('%s\t%s' % (word, 1))
\ No newline at end of file
+        print ('%s\t%s' % (word, 1))
+'''
\ No newline at end of file
diff --git a/bigdata/src/C_H_reducer.py b/bigdata/src/C_H_reducer.py
index d8a05bb..02d281e 100644
--- a/bigdata/src/C_H_reducer.py
+++ b/bigdata/src/C_H_reducer.py
@@ -1,18 +1,26 @@
 #!/usr/bin/env python
 """reducer.py"""
 
+# TEST FILE TO CHECK HOW THE INPUT/OUTPUT WORKS
+
+
 from operator import itemgetter
 import sys
 
+'''
 current_word = None
 current_count = 0
 word = None
-
+'''
 # input comes from STDIN
+i=0
 for line in sys.stdin:
     # remove leading and trailing whitespace
+    i=+1
     line = line.strip()
-
+    print("THIS IS THE LINE NUMBER:{}",str(i))
+    print(line)
+'''
     # parse the input we got from mapper.py
     word, count = line.split('\t', 1)
 
@@ -37,4 +45,5 @@
 
 # do not forget to output the last word if needed!
 if current_word == word:
-    print ('%s\t%s' % (current_word, current_count))
\ No newline at end of file
+    print ('%s\t%s' % (current_word, current_count))
+    '''
\ No newline at end of file