diff --git a/clean.sh b/clean.sh
new file mode 100644
index 0000000..41e5c7c
--- /dev/null
+++ b/clean.sh
@@ -0,0 +1,23 @@
+TRAINING='training_data.csv'
+TESTING='test_data.csv'
+ORIGINAL_TESTING='original_testing.csv'
+
+cat $TESTING > $ORIGINAL_TESTING
+
+python dos2unix.py $ORIGINAL_TESTING $ORIGINAL_TESTING
+python dos2unix.py $TRAINING $TRAINING
+python dos2unix.py $TESTING $TESTING
+
+sed -i.bak 's/female/-1/' $TRAINING
+sed -i.bak 's/female/-1/' $TRAINING
+sed -i.bak 's/male/1/' $TRAINING
+sed -i.bak 's/male/1/' $TRAINING
+sed -i.bak 's/TRUE/1/' $TRAINING
+sed -i.bak 's/FALSE/2/' $TRAINING
+
+sed -i.bak 's/female/-1/' $TESTING
+sed -i.bak 's/female/-1/' $TESTING
+sed -i.bak 's/male/1/' $TESTING
+sed -i.bak 's/male/1/' $TESTING
+
+rm *.bak
\ No newline at end of file
diff --git a/dos2unix.py b/dos2unix.py
new file mode 100644
index 0000000..ba47d96
--- /dev/null
+++ b/dos2unix.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+"""\
+convert dos linefeeds (crlf) to unix (lf)
+usage: dos2unix.py