This post helps in cleanup of HDFS files older than a certain date(30days) using a shell script.
#!/bin/sh
#finding HDFS load time of particular folder
today=`date +'%s'`
hdfs dfs -ls /file/Path/ | grep "^d" | while read line ; do
dir_date=$(echo ${line} | awk '{print $6}')
difference=$(( ( ${today} - $(date -d ${dir_date} +%s) ) / ( 24*60*60 ) ))
filePath=$(echo ${line} | awk '{print $8}')
if [ ${difference} -gt 30 ]; then
hdfs dfs -rm -r $filePath
fi
done
If you are facing any problems in deleting files, then please comment here.