diff --git a/.gitignore b/.gitignore
index bdf74e8..4a61924 100644
--- a/.gitignore
+++ b/.gitignore
@@ -130,7 +130,6 @@ dmypy.json
 
 # temporary files
 data_prep/data
-*.csv
 *.sav
 data_prep/src/cc/log.txt
 data_prep/src/cc/inference_output.json
diff --git a/data_prep/cc/cc_net/Makefile b/data_prep/cc/cc_net/Makefile
index e887ac4..bf6d3c2 100644
--- a/data_prep/cc/cc_net/Makefile
+++ b/data_prep/cc/cc_net/Makefile
@@ -1,6 +1,7 @@
 # Makefile to install CC-Net and train the LMs.
 # `make` or `make help` to get some help.
 
+
 # Arguments:
 lang?=en
 process?=8
@@ -58,6 +59,7 @@ dl_lm:
 lm: data/lm_sp/$(lang).sp.model data/lm_sp/$(lang).arpa.bin
 	# Computes a 5-gram LM for the given language -> make lang=it lm
 	# Restricted to the first NDOC_FOR_LM documents
+	mkdir -p data/lm_sp
 
 sp: data/lm_sp/$(lang).sp.model
 	# Train a sentence piece model on Wikipedia -> make lang=it sp
@@ -111,20 +113,21 @@ data/lm_sp/%.sp.model: data/cirrus/txt/%.opening.txt
 	echo "Trained SentencePiece model with `wc -l $(basename $@).vocab` pieces"
 
 data/cirrus/sp/%.opening.txt: data/cirrus/gz/%.json.gz data/lm_sp/%.sp.model
+	mkdir -p data/cirrus/sp
 	$(SPM_ENCODE) \
 		--model=$(word 2,$^) \
 		--output_format=piece \
-			< <(python get_wiki_cirrus.py opening --file $< --n_docs $(NDOC_FOR_LM)) \
+			< <(python cc_net/get_wiki_cirrus.py opening --file $< --n_docs $(NDOC_FOR_LM)) \
 			> $@
 
 data/cirrus/txt/%.opening.txt: data/cirrus/gz/%.json.gz
-	python get_wiki_cirrus.py opening \
+	python cc_net/get_wiki_cirrus.py opening \
 		--n_docs $(NDOC_FOR_LM) \
 		--file $< --output $@
 
 data/cirrus/gz/%.json.gz:
-	mkdir $(@D)
-	python get_wiki_cirrus.py dl --lang $(call get_lang,$(@F)) --output_dir $(@D)
+	mkdir -p $(@D)
+	python cc_net/get_wiki_cirrus.py dl --lang $(call get_lang,$(@F)) --output_dir $(@D)
 
 clean:
 	# Remove intemediary files, dataset, third_party sources
@@ -155,11 +158,8 @@ bin/lmplz: third_party/kenlm
 third_party/sentencepiece:
 	# Download sentencepiece sources: https://github.com/google/sentencepiece
 	mkdir -p $(@D)
-	wget -c -O $(@D)/sentencepiece.zip https://github.com/google/sentencepiece/archive/v0.1.83.zip
-	unzip -o -d $(@D) $(@D)/sentencepiece.zip
-	rm $(@D)/sentencepiece.zip
-	# remove the version id from the folder name
-	mv $(@D)/sentencepiece-* $@
+	git clone https://github.com/google/sentencepiece.git $(@D)/sentencepiece
+
 
 bin/spm_train: third_party/sentencepiece
 	# Compiles sentencepiece binaries
@@ -172,7 +172,10 @@ bin/spm_train: third_party/sentencepiece
 	# $ cd $</build
 	# $ sudo make install
 	# $ sudo ldconfig -v
-
+	# if using MAC OS
+	# ifeq ($(shell uname -s),Darwin)
+	# 	sudo update_dyld_shared_cache
+	
 test:
 	python -m cc_net mine --config test
 	mkdir -p test_data/mini
diff --git a/data_prep/cc/cc_net/cc_net/data/cutoff.csv b/data_prep/cc/cc_net/cc_net/data/cutoff.csv
new file mode 100644
index 0000000..f283fc7
--- /dev/null
+++ b/data_prep/cc/cc_net/cc_net/data/cutoff.csv
@@ -0,0 +1,101 @@
+,de,it,fr,nl,pl,pt,es,no,da,id,lt,fi,en,hu,ro,ko,ar,bn,fa,ru,uk,ml,my,lv,is,ca,ne,et,hy,ja,hr,hi,az,el,cs,bg,he,zh,ka,km,gu,mk,kn,mr,af,mn,kk,be
+0,0,0,0,10,0,0,0,0,0,0,0,10,0,0,0,20,0,0,0,0,0,0,10,10,10,0,10,0,0,0,0,0,0,0,0,0,0,10,0,10,10,10,10,10,10,0,0,10
+1,150,100,70,170,50,70,70,170,160,210,70,400,160,30,70,20,100,70,110,70,40,480,550,430,410,80,330,470,210,400,290,190,130,210,270,120,400,600,230,410,510,190,730,460,130,240,50,170
+2,170,110,90,200,70,80,90,210,190,260,90,490,180,30,80,30,180,80,150,90,50,740,840,570,490,90,420,630,300,540,360,220,200,250,350,180,520,750,360,460,580,250,1020,630,190,300,110,250
+3,190,120,90,230,70,90,90,230,210,300,100,570,200,40,90,30,220,90,180,110,60,930,980,650,530,100,470,720,360,620,400,240,240,270,410,220,620,840,420,490,630,280,1170,720,220,330,150,300
+4,200,130,100,240,80,90,100,250,220,320,110,620,210,40,100,30,250,100,190,110,60,1080,1140,710,560,110,510,780,400,670,440,260,270,280,450,230,690,900,480,530,670,300,1280,790,260,360,190,340
+5,210,130,100,260,90,100,110,260,230,340,110,670,220,40,110,30,260,100,210,120,70,1210,1260,760,580,120,540,830,430,710,470,270,300,290,480,250,760,950,520,550,690,320,1370,840,290,380,210,370
+6,220,140,110,270,90,100,110,270,240,350,120,700,230,40,110,30,280,110,220,130,70,1310,1390,790,600,120,570,870,450,750,490,280,320,300,510,270,810,990,560,560,720,340,1440,880,310,400,240,390
+7,230,140,110,280,100,110,120,280,250,370,120,740,230,50,120,30,290,110,230,140,70,1400,1500,820,620,130,590,900,480,770,510,300,340,310,540,290,850,1030,580,570,740,350,1500,920,330,410,270,410
+8,230,150,110,290,100,110,120,290,260,380,130,770,240,50,120,40,310,120,240,140,80,1470,1590,840,640,130,610,930,500,800,530,310,360,320,560,300,880,1060,610,600,760,370,1550,950,350,430,280,430
+9,240,150,120,300,100,110,120,300,270,400,130,790,250,50,120,40,320,120,240,150,80,1540,1660,860,650,140,630,960,520,820,540,310,380,330,580,310,910,1090,630,610,780,380,1600,990,370,450,310,450
+10,250,160,120,310,110,120,130,310,270,410,140,810,250,50,130,40,330,130,250,150,90,1600,1740,880,660,140,650,980,530,840,550,320,390,340,600,320,940,1110,650,620,800,390,1640,1010,380,460,330,470
+11,250,160,120,310,110,120,130,310,280,420,140,830,260,50,130,60,340,130,260,150,90,1650,1810,900,680,150,660,1000,550,860,570,330,410,350,610,330,970,1140,670,640,820,400,1680,1040,390,480,350,480
+12,260,160,130,320,110,120,130,320,290,430,140,850,260,50,130,70,350,130,270,160,90,1700,1870,920,690,150,680,1020,570,880,580,340,420,350,630,340,990,1160,690,660,840,410,1720,1060,400,490,370,500
+13,270,170,130,330,120,130,140,330,290,440,150,870,270,60,140,80,360,140,270,160,90,1750,1930,930,700,150,690,1030,580,890,590,350,440,360,640,340,1010,1180,710,680,860,420,1760,1090,410,500,380,510
+14,270,170,130,340,120,130,140,340,300,450,150,890,270,60,140,110,370,140,280,170,100,1800,1990,950,710,160,700,1050,590,910,600,360,450,370,650,350,1030,1200,730,700,880,430,1790,1110,420,510,400,520
+15,280,170,140,340,120,130,140,340,300,460,150,900,280,60,140,110,380,140,290,170,100,1850,2040,960,720,160,720,1070,600,920,610,360,460,370,670,360,1050,1220,740,710,890,430,1820,1130,430,520,420,540
+16,280,180,140,350,120,130,140,350,310,470,160,920,280,60,150,120,380,150,290,170,100,1890,2080,980,730,160,730,1080,620,940,620,370,470,380,680,360,1070,1240,750,740,910,440,1850,1150,440,530,440,550
+17,290,180,140,360,130,140,150,350,320,480,160,940,280,60,150,120,390,150,300,180,110,1940,2120,990,740,170,740,1100,630,950,630,380,480,380,690,370,1090,1260,770,760,920,450,1880,1170,450,550,450,560
+18,300,180,150,360,130,140,150,360,320,490,160,950,290,60,150,160,400,150,300,180,110,1980,2170,1000,750,170,750,1110,640,970,640,390,490,390,700,380,1100,1270,780,770,940,460,1910,1190,450,560,470,570
+19,300,190,150,370,130,140,150,370,330,500,160,970,290,70,150,230,410,160,310,180,110,2030,2200,1010,760,170,760,1130,650,980,640,390,500,390,710,380,1120,1290,790,790,950,460,1940,1210,460,570,480,580
+20,310,190,150,370,130,140,150,370,330,510,170,980,300,70,160,330,420,160,310,180,110,2070,2240,1030,770,180,770,1140,660,990,650,400,510,400,720,390,1140,1310,800,810,970,470,1970,1220,470,580,490,590
+21,310,190,150,380,140,150,160,380,340,520,170,990,300,70,160,370,420,160,320,190,120,2110,2270,1040,770,180,780,1160,670,1010,660,410,520,400,740,400,1150,1320,820,830,980,480,1990,1240,480,590,510,600
+22,320,200,160,390,140,150,160,380,340,530,170,1010,300,70,160,450,430,170,330,190,120,2160,2300,1050,780,180,790,1170,670,1020,670,410,530,410,750,400,1170,1340,830,850,1000,480,2020,1260,490,600,520,610
+23,320,200,160,390,140,150,160,390,350,540,170,1020,310,70,160,600,440,170,330,190,120,2190,2340,1060,790,190,800,1180,680,1030,680,420,540,420,760,410,1180,1350,840,870,1010,490,2040,1270,490,610,530,620
+24,330,200,160,400,140,150,160,400,350,540,170,1030,310,70,170,670,450,170,340,200,130,2230,2360,1070,800,190,810,1190,690,1040,680,430,550,420,770,410,1200,1370,850,890,1020,500,2060,1290,500,620,550,630
+25,340,210,170,400,150,160,170,400,360,550,180,1050,320,80,170,740,460,170,340,200,130,2270,2390,1080,810,190,820,1210,700,1050,690,440,560,430,780,420,1210,1380,860,910,1040,500,2090,1300,510,630,560,640
+26,340,210,170,410,150,160,170,410,360,560,180,1060,320,80,170,790,460,180,350,200,130,2300,2420,1090,820,190,830,1220,710,1060,700,440,570,430,780,420,1220,1400,870,930,1050,510,2110,1320,510,640,570,640
+27,350,210,170,420,150,160,170,410,370,570,180,1070,320,80,170,840,470,180,350,200,130,2340,2450,1100,830,200,840,1230,720,1080,700,450,580,440,790,430,1240,1410,880,960,1070,510,2140,1330,520,650,580,650
+28,350,220,180,420,150,160,170,420,370,580,180,1090,330,80,180,840,480,180,360,210,140,2370,2470,1110,840,200,850,1240,730,1090,710,460,580,440,800,440,1250,1430,890,990,1080,520,2160,1350,530,660,590,660
+29,360,220,180,430,160,160,180,430,380,590,190,1100,330,80,180,890,490,190,370,210,140,2400,2500,1120,850,200,860,1250,740,1100,720,470,590,450,810,440,1270,1440,900,1010,1100,520,2180,1370,530,670,600,670
+30,370,220,180,430,160,170,180,430,380,600,190,1110,340,80,180,920,490,190,370,210,140,2430,2530,1130,850,210,860,1270,750,1110,720,480,600,450,820,450,1280,1460,910,1040,1110,530,2210,1380,540,680,610,670
+31,370,230,190,440,160,170,180,440,390,610,190,1120,340,80,180,920,500,190,380,220,140,2470,2550,1140,860,210,870,1280,750,1120,730,480,610,460,830,460,1290,1470,920,1070,1120,540,2230,1400,550,690,620,680
+32,380,230,190,450,160,170,180,440,390,620,190,1140,350,90,180,920,510,200,390,220,150,2510,2570,1150,870,210,880,1290,760,1130,740,490,620,460,840,460,1310,1490,930,1100,1140,540,2250,1410,550,700,630,690
+33,380,230,190,450,170,170,190,450,400,630,190,1150,350,90,190,940,520,200,390,220,150,2540,2590,1160,880,220,890,1300,770,1140,740,500,630,470,850,470,1320,1500,940,1120,1150,550,2270,1430,560,710,650,700
+34,390,240,200,460,170,180,190,460,410,640,200,1160,350,90,190,940,530,200,400,220,150,2570,2610,1170,890,220,900,1310,780,1150,750,510,640,470,860,470,1330,1520,960,1150,1160,550,2290,1440,570,720,660,700
+35,400,240,200,460,170,180,190,460,410,640,200,1170,360,90,190,940,530,210,410,230,150,2590,2630,1180,900,220,910,1320,790,1160,760,520,650,480,870,480,1350,1530,970,1180,1180,560,2310,1460,580,730,670,710
+36,400,240,200,470,170,180,190,470,420,660,200,1180,360,90,190,1010,540,210,410,230,160,2620,2650,1190,910,230,920,1330,800,1170,760,520,660,480,880,490,1360,1540,980,1210,1190,560,2330,1470,580,740,680,720
+37,410,250,210,480,180,190,200,480,420,670,200,1200,370,90,200,1010,550,210,420,230,160,2650,2660,1200,920,230,930,1340,810,1180,770,530,670,490,890,490,1370,1560,990,1240,1200,570,2350,1490,590,750,690,730
+38,410,250,210,480,180,190,200,480,430,680,210,1210,370,100,200,1020,560,210,430,230,160,2680,2680,1210,930,230,930,1350,820,1190,770,540,680,500,900,500,1390,1570,1000,1270,1220,580,2370,1500,600,760,700,730
+39,420,260,210,490,180,190,200,490,440,690,210,1220,380,100,200,1020,570,220,440,240,160,2710,2700,1220,930,240,940,1360,830,1200,780,550,690,500,910,510,1400,1590,1010,1300,1230,580,2390,1520,600,770,710,740
+40,430,260,220,490,190,190,210,500,440,700,210,1230,380,100,200,1020,570,220,440,240,170,2740,2720,1240,940,240,950,1370,840,1210,790,560,700,510,920,510,1410,1610,1020,1330,1250,590,2410,1540,610,780,720,750
+41,430,260,220,500,190,200,210,500,450,710,210,1240,390,100,210,1050,580,220,450,240,170,2770,2740,1250,950,240,960,1380,850,1230,790,570,710,510,930,520,1430,1620,1030,1360,1260,600,2430,1550,620,790,730,760
+42,440,270,220,510,190,200,210,510,450,720,210,1260,390,100,210,1050,590,230,460,250,170,2800,2760,1260,960,250,970,1390,860,1240,800,580,720,520,940,530,1440,1640,1040,1400,1270,600,2450,1570,630,800,740,770
+43,450,270,230,510,190,200,220,520,460,730,220,1270,400,110,210,1050,600,230,470,250,170,2820,2770,1270,970,250,980,1410,860,1250,800,590,730,530,940,530,1450,1650,1050,1430,1290,610,2470,1580,630,810,750,770
+44,450,280,230,520,200,210,220,530,470,740,220,1280,400,110,220,1050,610,230,480,250,180,2840,2790,1280,980,250,980,1420,870,1260,810,600,740,530,950,540,1470,1670,1070,1470,1300,610,2490,1600,640,820,750,780
+45,460,280,240,530,200,210,220,530,480,750,220,1290,410,110,220,1180,620,240,490,260,180,2870,2800,1290,990,260,990,1430,890,1270,820,610,760,540,970,550,1480,1690,1080,1510,1320,620,2510,1610,650,830,770,790
+46,470,280,240,530,200,210,230,540,480,760,220,1310,420,110,220,1180,630,240,500,260,180,2900,2820,1300,1000,260,1000,1440,900,1280,820,620,770,550,980,550,1500,1700,1090,1550,1330,630,2530,1630,650,840,780,790
+47,470,290,250,540,210,220,230,550,490,780,230,1320,420,110,220,1260,640,250,510,260,180,2930,2840,1310,1010,270,1010,1450,910,1290,830,630,780,550,990,560,1510,1720,1100,1580,1350,630,2540,1640,660,850,790,800
+48,480,290,250,550,210,220,230,560,500,790,230,1330,430,120,230,1410,650,250,520,270,190,2950,2850,1320,1020,270,1020,1460,920,1300,840,640,790,560,1000,570,1530,1740,1120,1620,1360,640,2570,1660,670,860,800,810
+49,490,300,260,560,210,220,240,570,500,800,230,1340,440,120,230,1430,660,250,530,270,190,2970,2870,1330,1030,270,1030,1470,930,1310,840,650,800,570,1010,580,1540,1750,1130,1650,1370,650,2580,1670,680,880,810,820
+50,500,300,260,560,220,230,240,570,510,810,230,1360,440,120,230,1540,670,260,550,270,190,3000,2880,1350,1050,280,1040,1480,940,1330,850,660,820,580,1020,590,1560,1770,1140,1690,1390,660,2600,1690,680,890,820,830
+51,500,310,270,570,220,230,250,580,520,830,240,1370,450,120,240,1560,680,260,560,280,200,3020,2900,1360,1060,280,1050,1500,950,1340,850,670,830,580,1030,600,1570,1790,1160,1730,1410,660,2620,1710,690,900,830,830
+52,510,310,270,580,220,230,250,590,530,840,240,1380,460,120,240,1610,690,270,570,280,200,3040,2910,1370,1070,290,1050,1510,970,1350,860,680,840,590,1040,600,1590,1810,1180,1780,1420,670,2640,1720,700,910,840,840
+53,520,320,280,590,230,240,250,600,540,850,240,1400,460,130,240,1700,700,270,580,280,200,3070,2930,1390,1090,290,1060,1520,980,1360,870,700,850,600,1050,610,1600,1830,1190,1820,1440,680,2660,1740,710,920,850,850
+54,530,320,280,590,230,240,260,610,550,870,250,1410,470,130,250,1730,710,280,600,290,200,3090,2940,1400,1100,300,1070,1540,990,1370,870,710,870,610,1060,620,1610,1840,1210,1870,1460,690,2680,1760,710,930,860,860
+55,540,330,290,600,240,250,260,620,560,880,250,1430,480,130,250,1800,720,280,620,290,210,3110,2960,1410,1120,300,1080,1550,1000,1380,880,720,880,620,1080,630,1630,1860,1220,1910,1480,700,2700,1780,720,950,870,860
+56,550,340,300,610,240,250,270,630,560,900,250,1440,490,130,250,1850,730,280,630,300,210,3130,2980,1430,1130,300,1090,1560,1020,1400,890,740,890,630,1090,640,1650,1880,1240,1960,1490,700,2720,1790,730,960,880,870
+57,560,340,300,620,240,260,270,640,570,910,250,1450,500,140,260,1950,750,290,650,300,210,3150,2990,1440,1150,310,1100,1580,1030,1410,890,750,900,640,1100,650,1660,1900,1260,2020,1510,710,2740,1810,740,970,900,880
+58,570,350,310,630,250,260,280,660,580,930,260,1470,510,140,260,1950,760,290,670,300,220,3170,3010,1450,1160,310,1110,1590,1050,1420,900,760,920,650,1110,660,1680,1920,1270,2070,1530,720,2760,1830,750,990,910,890
+59,580,350,320,640,250,260,280,670,590,950,260,1480,510,140,260,2040,770,300,680,310,220,3200,3020,1470,1170,320,1120,1600,1060,1440,910,780,930,660,1130,670,1700,1940,1290,2130,1550,730,2780,1850,760,1000,920,900
+60,590,360,330,650,260,270,290,680,600,970,270,1500,520,140,270,2130,790,300,700,310,220,3220,3040,1480,1190,330,1130,1620,1080,1450,910,790,950,670,1140,680,1710,1950,1310,2180,1570,740,2800,1870,770,1010,930,910
+61,600,370,340,660,260,270,290,690,620,980,270,1510,530,150,270,2210,800,310,720,320,230,3240,3050,1500,1210,330,1140,1630,1090,1460,920,810,960,680,1160,690,1730,1970,1330,2230,1580,750,2820,1890,780,1030,940,920
+62,610,370,340,670,270,280,300,710,630,1000,270,1530,550,150,280,2280,810,320,740,320,230,3260,3060,1510,1220,340,1160,1650,1110,1470,930,830,980,690,1170,700,1750,2000,1350,2290,1600,760,2840,1920,790,1040,950,920
+63,620,380,350,680,270,290,300,720,640,1020,280,1550,560,150,280,2290,830,320,770,330,230,3280,3080,1530,1240,340,1170,1660,1130,1490,930,840,990,700,1190,720,1770,2020,1370,2350,1620,770,2860,1940,800,1060,970,930
+64,630,390,360,690,280,290,310,730,650,1040,280,1560,570,160,290,2310,850,330,790,330,240,3300,3090,1540,1260,350,1180,1680,1150,1500,940,860,1010,720,1200,730,1790,2040,1400,2400,1650,780,2880,1960,810,1080,980,950
+65,640,400,370,700,280,300,320,750,670,1060,280,1580,580,160,290,2380,860,330,810,340,240,3320,3110,1560,1280,360,1190,1690,1170,1520,950,880,1020,730,1220,740,1810,2060,1420,2460,1670,790,2900,1990,820,1100,990,950
+66,660,410,380,710,290,300,320,770,680,1090,290,1600,600,160,300,2400,880,340,840,340,240,3340,3120,1580,1310,360,1200,1710,1190,1530,960,900,1030,740,1230,750,1830,2090,1450,2510,1690,810,2920,2010,830,1120,1010,960
+67,670,410,400,730,300,310,330,780,690,1110,290,1620,610,170,300,2420,900,350,870,350,250,3360,3140,1600,1330,370,1210,1730,1210,1550,960,920,1050,760,1250,770,1850,2120,1480,2570,1710,820,2950,2030,840,1130,1020,970
+68,680,420,410,740,300,320,340,800,710,1130,300,1640,630,170,310,2450,920,360,890,350,250,3380,3150,1620,1350,380,1230,1740,1230,1570,970,940,1070,770,1270,780,1870,2140,1510,2620,1740,830,2970,2050,850,1150,1030,980
+69,700,430,420,750,310,320,340,820,720,1150,300,1650,640,170,310,2490,940,360,920,360,260,3400,3170,1640,1380,390,1240,1760,1250,1580,980,960,1080,790,1290,790,1890,2170,1540,2670,1760,850,2990,2080,870,1180,1050,990
+70,710,440,430,770,320,330,350,840,740,1180,310,1670,660,180,320,2500,960,370,950,370,260,3420,3190,1660,1400,400,1250,1780,1270,1600,990,980,1100,800,1300,810,1910,2200,1570,2710,1790,860,3010,2100,880,1200,1060,1000
+71,730,460,450,780,320,340,360,860,760,1200,310,1690,680,180,320,2570,980,380,980,370,270,3440,3210,1680,1430,400,1270,1800,1290,1620,990,1010,1120,820,1320,830,1940,2230,1600,2760,1810,880,3030,2130,890,1220,1080,1010
+72,740,470,460,800,330,350,370,890,770,1230,320,1710,700,190,330,2660,1000,390,1010,380,270,3460,3220,1700,1460,410,1280,1820,1320,1640,1000,1030,1140,840,1340,840,1960,2260,1640,2810,1840,890,3060,2160,900,1250,1100,1030
+73,760,480,480,810,340,350,380,910,790,1260,330,1740,720,190,340,2730,1030,400,1050,390,280,3480,3240,1730,1490,420,1300,1840,1340,1660,1010,1060,1170,860,1360,860,1990,2290,1670,2860,1870,910,3080,2190,920,1270,1110,1040
+74,780,500,500,830,350,360,390,940,810,1290,330,1760,740,200,340,2850,1050,410,1080,400,280,3500,3260,1750,1520,430,1320,1860,1360,1680,1020,1080,1190,880,1390,880,2010,2320,1710,2900,1900,930,3110,2220,930,1300,1130,1050
+75,800,510,520,850,360,370,400,970,830,1320,340,1780,770,200,350,2930,1070,420,1110,400,290,3520,3280,1780,1560,440,1330,1880,1390,1710,1030,1110,1220,900,1410,900,2030,2350,1760,2940,1930,950,3130,2250,950,1330,1150,1060
+76,820,530,530,870,370,380,410,1000,860,1350,350,1810,800,210,360,2980,1100,440,1150,410,290,3540,3290,1800,1600,460,1350,1900,1420,1730,1040,1150,1240,920,1430,920,2060,2390,1800,2980,1940,970,3160,2280,970,1360,1170,1070
+77,840,550,550,890,380,390,430,1030,880,1380,350,1830,830,210,360,2990,1130,450,1190,420,300,3560,3310,1830,1630,470,1370,1930,1450,1750,1060,1180,1260,950,1460,940,2090,2420,1850,3020,1980,990,3190,2320,980,1400,1190,1090
+78,860,570,570,910,390,400,440,1070,910,1420,360,1860,860,220,370,3080,1160,470,1230,430,310,3580,3330,1860,1670,480,1390,1950,1480,1780,1070,1220,1290,970,1490,960,2120,2460,1900,3060,2010,1010,3210,2350,1000,1430,1210,1100
+79,890,590,600,930,400,410,450,1110,940,1460,370,1880,890,220,380,3170,1200,480,1270,440,310,3600,3350,1890,1720,500,1400,1980,1510,1810,1080,1260,1320,1000,1520,990,2150,2500,1950,3100,2050,1030,3240,2390,1030,1470,1230,1120
+80,920,620,630,960,410,420,470,1150,970,1500,380,1910,930,230,390,3210,1230,500,1320,450,320,3620,3370,1920,1760,510,1430,2010,1540,1830,1090,1300,1350,1030,1550,1010,2180,2540,2020,3140,2100,1060,3260,2420,1050,1510,1250,1130
+81,950,640,660,990,430,440,480,1200,1000,1540,390,1940,970,240,410,3290,1260,520,1370,460,330,3640,3390,1960,1810,520,1450,2040,1580,1860,1110,1340,1390,1060,1580,1040,2220,2590,2080,3180,2140,1090,3290,2460,1070,1540,1280,1150
+82,980,670,700,1010,440,450,500,1260,1030,1580,400,1980,1010,250,420,3370,1300,540,1430,480,340,3660,3410,1990,1860,540,1470,2070,1610,1890,1120,1390,1430,1100,1620,1070,2250,2630,2160,3230,2190,1120,3320,2500,1100,1590,1310,1170
+83,1010,710,740,1050,450,460,520,1320,1070,1630,410,2010,1060,260,430,3420,1340,570,1490,490,350,3680,3430,2030,1920,560,1500,2110,1640,1930,1140,1440,1470,1130,1650,1100,2290,2680,2230,3260,2240,1150,3350,2540,1130,1640,1340,1190
+84,1050,750,780,1080,470,480,540,1390,1110,1690,420,2050,1110,260,450,3460,1390,590,1550,510,360,3690,3460,2080,1990,580,1530,2150,1680,1960,1150,1490,1520,1170,1690,1140,2330,2730,2320,3310,2290,1190,3380,2590,1170,1690,1370,1210
+85,1100,800,830,1120,490,500,560,1470,1160,1740,440,2090,1170,270,470,3540,1440,630,1620,520,370,3710,3480,2120,2060,600,1560,2190,1730,2000,1170,1550,1570,1220,1740,1180,2370,2780,2420,3350,2350,1230,3410,2600,1200,1740,1400,1220
+86,1150,860,890,1170,500,520,590,1560,1210,1810,450,2130,1230,280,490,3620,1490,660,1680,540,390,3730,3510,2170,2140,630,1600,2230,1770,2040,1190,1620,1630,1260,1780,1220,2410,2830,2550,3390,2410,1280,3440,2660,1240,1790,1450,1250
+87,1200,930,960,1210,520,540,630,1660,1270,1870,460,2180,1300,300,510,3670,1550,700,1710,560,400,3750,3540,2220,2210,650,1630,2280,1820,2080,1210,1690,1680,1320,1830,1260,2460,2890,2700,3430,2490,1330,3470,2700,1280,1850,1490,1280
+88,1260,1010,1040,1260,550,570,660,1770,1340,1930,480,2240,1380,310,540,3670,1610,740,1740,590,420,3770,3560,2270,2280,690,1670,2320,1880,2130,1240,1760,1750,1380,1890,1320,2520,2950,2880,3470,2570,1380,3510,2750,1330,1920,1540,1310
+89,1340,1100,1140,1320,570,600,710,1870,1420,2010,500,2300,1460,330,570,3670,1680,780,1760,610,430,3790,3590,2340,2360,720,1720,2380,1930,2190,1260,1840,1820,1450,1950,1370,2580,3020,3080,3520,2640,1440,3540,2810,1380,2000,1590,1350
+90,1420,1220,1260,1390,610,630,760,1980,1510,2110,520,2370,1570,350,620,3670,1760,840,1800,640,460,3810,3610,2400,2430,760,1770,2440,2000,2250,1290,1920,1910,1530,2010,1440,2630,3090,3350,3560,2710,1510,3580,2870,1420,2090,1650,1390
+91,1530,1360,1390,1460,650,670,820,2100,1620,2220,550,2440,1700,380,680,3670,1850,900,1880,670,480,3830,3650,2470,2510,800,1830,2510,2070,2310,1320,2000,2000,1620,2090,1510,2700,3160,3750,3600,2800,1590,3610,2930,1470,2200,1710,1430
+92,1660,1440,1560,1550,690,720,900,2240,1730,2340,580,2530,1860,410,750,3670,1940,970,2000,710,510,3850,3680,2550,2590,850,1900,2590,2150,2380,1360,2100,2100,1730,2180,1600,2770,3240,3850,3650,2900,1670,3650,3010,1530,2310,1790,1490
+93,1830,1610,1760,1650,750,780,980,2380,1870,2480,610,2630,2060,450,830,3680,2050,1060,2110,760,550,3870,3710,2640,2650,920,1960,2680,2250,2450,1410,2200,2220,1850,2280,1700,2860,3320,3860,3690,2990,1760,3690,3050,1610,2420,1880,1540
+94,2060,1910,1870,1780,820,860,1090,2500,2010,2640,650,2750,2270,500,890,3680,2170,1170,2220,820,590,3890,3750,2740,2740,1000,2060,2780,2360,2530,1460,2320,2360,1990,2400,1820,2950,3400,3870,3730,3100,1870,3730,3140,1720,2560,1990,1620
+95,2350,2300,2030,1930,920,960,1260,2640,2200,2810,700,2880,2520,550,960,3750,2320,1300,2350,900,640,3910,3780,2850,2840,1100,2180,2900,2480,2640,1530,2480,2510,2130,2540,1980,3060,3490,3870,3770,3220,2020,3760,3230,1840,2730,2120,1720
+96,2690,2570,2390,2110,1050,1090,1520,2830,2450,3060,770,3050,2750,580,1010,3800,2480,1470,2540,990,710,3930,3820,2980,2950,1250,2330,3040,2620,2770,1620,2680,2690,2350,2710,2170,3180,3590,3880,3810,3340,2180,3810,3340,2010,2930,2280,1860
+97,3140,2790,2910,2360,1220,1290,1870,3090,2770,3420,850,3250,3260,680,1060,3860,2680,1710,2770,1130,790,3950,3860,3150,3090,1450,2530,3210,2790,2950,1740,2920,2890,2610,2860,2440,3320,3680,3880,3850,3480,2410,3850,3470,2230,3170,2500,2010
+98,3560,3270,3230,2670,1500,1610,2260,3370,3160,3840,990,3470,3460,830,1140,3920,2950,2070,2990,1370,950,3970,3910,3350,3280,1680,2800,3440,3030,3160,1930,3210,3150,2930,3100,2820,3500,3780,3880,3910,3610,2730,3900,3620,2590,3400,2820,2310
+99,3560,3660,3520,3150,1880,2290,2540,3630,3590,3860,1270,3720,3590,1230,1630,3950,3330,2640,3370,1850,1320,3990,3950,3630,3570,2210,3240,3710,3370,3460,2290,3570,3500,3370,3470,3410,3730,3890,3890,3950,3800,3200,3950,3810,3200,3690,3270,2850
diff --git a/data_prep/cc/cc_net/cc_net/get_wiki_cirrus.py b/data_prep/cc/cc_net/cc_net/get_wiki_cirrus.py
index a3e1d43..0241cc3 100644
--- a/data_prep/cc/cc_net/cc_net/get_wiki_cirrus.py
+++ b/data_prep/cc/cc_net/cc_net/get_wiki_cirrus.py
@@ -14,7 +14,7 @@
 import urllib.request
 from pathlib import Path
 from typing import Dict
-
+import os
 import func_argparse
 from bs4 import BeautifulSoup  # type: ignore
 
@@ -38,8 +38,8 @@ def opening(file: Path, output: Path = None, n_docs: int = 1_000_000):
         - tokenize: whether to tokenize the text
         - lang: Language code used to chose the tokenizer
     """
-    assert file.exists()
-    return jsonql.run_pipes(
+    assert file.exists(), f"{file} does not exist"
+    jsonql.run_pipes(
         functools.partial(extract_opening_text, n_docs=n_docs),
         file=file,
         output=tmp(output) if output else None,
@@ -116,8 +116,12 @@ def get_cirrus_urls(date: str = None) -> Dict[str, str]:
 
 
 def wget(url: str, output: Path):
-    subprocess.run(["wget", url, "-O", tmp(output), "-q"], check=True)
-    tmp(output).replace(output)
+    if not os.path.isfile(output):
+        subprocess.run(["wget", url, "-O", tmp(output), "-q"], check=True)
+        tmp(output).replace(output)
+    else:
+        print(f"File {tmp(output)} already exists, skipping download")
+
     assert (
         output.stat().st_size > 10_000
     ), f"File {output} downloaded from {url} looks too small"
diff --git a/data_prep/cc/cc_net/cc_net/mine.py b/data_prep/cc/cc_net/cc_net/mine.py
index 907c588..02d40cf 100644
--- a/data_prep/cc/cc_net/cc_net/mine.py
+++ b/data_prep/cc/cc_net/cc_net/mine.py
@@ -43,7 +43,7 @@
     "drop",
     "split_by_lang",
 ]
-
+import logging
 
 class Config(NamedTuple):
     """
@@ -120,7 +120,7 @@ def get_cc_shard(self, shard: int) -> process_wet_file.CCShardReader:
             self.cache_dir.mkdir(exist_ok=True)
             dump_cache = self.cache_dir / self.dump
             dump_cache.mkdir(exist_ok=True)
-
+        
         return process_wet_file.CCShardReader(
             self.dump,
             shard=shard,
@@ -265,8 +265,9 @@ def hashes(conf: Config) -> List[Path]:
     ex(_hashes_shard, repeat(conf), *_transpose(missing_outputs))
 
     # Wait a bit so that files appears on the disk.
+    logging.info("Waiting for hashes to be written on the disk...")
     time.sleep(20)
-    assert all(o.exists() for o in outputs)
+    assert all(o.exists() for o in outputs), f"Missing outputs: {outputs}"
     return outputs
 
 
diff --git a/data_prep/cc/cc_net/config/test_segment.json b/data_prep/cc/cc_net/config/test_segment.json
index 79b2613..9b3b9b0 100644
--- a/data_prep/cc/cc_net/config/test_segment.json
+++ b/data_prep/cc/cc_net/config/test_segment.json
@@ -1,10 +1,10 @@
 {
     "hash_in_mem": 2,
     "dump": "2019-09",
-    "num_shards": 4,
+    "num_shards": 1,
     "num_segments_per_shard": 1,
     "mine_num_processes": 0,
-    "lang_whitelist": ["de", "it", "fr"],
+    "lang_whitelist": ["af"],
     "pipeline": [
         "dedup",
         "lid",
@@ -19,5 +19,5 @@
     "output_dir": "test_data2",
     "mined_dir": "mined_by_segment",
     "target_size": "32M",
-    "cache_dir": "test_data/wet_cache"
+    "cache_dir": "test_data2/wet_cache"
 }