Hadoop - CLI
-R
for "recursively".
Get Classpath
$ hadoop classpath
Check Queues
$ hadoop queue -list
Check logs
$ yarn logs -applicationId <applicationId>
where <applicationId>
can be found from logs dumped to terminal.
Count and check quota
$ hadoop fs -count
hadoop check quota
$ hadoop fs -count -q -h /my/folder
Find user quota
$ hadoop fs -count -q /user/$USER
Set different replication factor
a) this will set all files with no replication (1 copy only) under <file path>
$ hadoop dfs -setrep -R -w 1 <file path>
b)this will set <file>
with 2 replications
$ hadoop dfs -setrep -w 2 <file>
List, Make, Remove
List files and directories, and its recursive version.
hadoop fs -ls [PATH ...]
hadoop fs -lsr [PATH ...]
e.g.
hadoop fs -ls file:///
hadoop fs -ls hdfs:///
Show the content of files. Use text for compressed files.
hadoop fs -cat FILE [FILE ...]
hadoop fs -text FILE [FILE ...]
Show the content of last 1k of FILE
hadoop fs -tail [-f] FILE
Make Directory. Automatically create any missing directories.
hadoop fs -mkdir PATH [PATH ...]
Remove and its recursive version
hadoop fs -rm PATH [PATH ...]
hadoop fs -rmr PATH [PATH ...]
Create files of length 0. Fails if files already exist and have nonzero length.
hadoop fs -touchz FILE [FILE ...]
Test
- -e Returns 0 if PATH exists.
- -z Returns 0 if file length is 0.
- -d Returns 0 if PATH is a directory.
e.g.
hadoop fs -test -[ezd] PATH
Set Replication. -w will wait for the replication factor to match the target.
hadoop fs -setrep [-R] [-w] REP PATH [PATH ...]
Copy and Move
hadoop fs -cp SRC [SRC ...] DST
hadoop fs -mv SRC [SRC ...] DST
From Local to HDFS
hadoop fs -copyFromLocal LOCALSRC [LOCALSRC ...] DST
hadoop fs -put LOCALSRC [LOCALSRC ...] DST
hadoop fs -moveFromLocal LOCALSRC [LOCALSRC ...] DST
From HDFS to Local
hadoop fs -copyToLocal [-ignorecrc] [-crc] SRC [SRC...] LOCALDST
hadoop fs -get [-ignorecrc] [-crc] SRC [SRC...] LOCALDST
hadoop fs -moveToLocal [-ignorecrc] [-crc] SRC [SRC...] LOCALDST
e.g.
hadoop fs -copyFromLocal test.txt hdfs://localhost:54310/user/xxx/test.txt
Use default(defined as fs.default.name)
hadoop fs -copyFromLocal test.txt /user/xxx/test.txt
Use default home directory(omit /user/xxx)
hadoop fs -copyFromLocal test.txt test.txt
e.g. test.txt on hdfs, test2.txt on local
hadoop fs -copyToLocal test.txt test2.txt
Get Merge: Get all the files in the directories that match the source file pattern and merge and sort them to only one file on local fs. <src>
is kept.
hadoop fs -getmerge SRC LOCALDST
Statistics
Display statistics
- %b Size of file in blocks
- %F The string “directory” or “regular file” depending on file type
- %n Filename
- %o Block size
- %r Replication
- %y UTC date in yyyy-MM-dd HH:mm:ss format
- %Y Milliseconds since January 1, 1970 UTC
hadoop fs -stat [FORMAT] PATH [PATH ...]
Show statistics: #subdirectories, #files, #bytes used. -q to display quota
hadoop fs -count [-q] PATH [PATH ...]
Disk Usage. Display sizes of files. List all files in directories.
hadoop fs -du [PATH ...]
Disk Usage. Sum up the sizes of files in the directory.
hadoop fs -dus [PATH ...]
Empty the trash bin.
hadoop fs -expunge
Permission
Change Group. Must be the owner or a superuser.
hadoop fs -chgrp [-R] GROUP PATH [PATH ...]
Change Permission.
hadoop fs -chmod [-R] MODE[, MODE ...] PATH [PATH ...]
Change Owner
hadoop fs -chown [-R] [OWNER][:[GROUP]] PATH [PATH ...]
Others
Help
hadoop fs -help [CMD]
fsck
hadoop fsck /
dfsadmin
hadoop dfsadmin -report
hadoop dfsadmin -metasave filename
Job
hadoop job -list
Kill a Job
$ yarn application -kill <applicationId>
or
$ hadoop job -kill <jobid>
Setup New User
Make new user directory::
$ hadoop fs -mkdir /user/username
Change the owner of the directory::
$ hadoop fs -chown username:username /user/username
Set Quota::
$ hadoop dfsadmin -setSpaceQuota 1t /user/username
Generic Options
Specify a configuration file.
-conf <configuration file>
Set value for a JobConf property.
-D <property=value>
Specify a NameNode, can be “local”.
-fs <local|namenode:port>
Specify a JobTracker.
-jt <local|jobtracker:port>
Specify a comma-separated list of files to be used with the MapReduce job. These files are automatically distributed to all task nodes to be locally available.
-files <list of files>
Specify a comma-separated list of jar files to be included in the classpath of all task JVMs.
-libjars <list of jars>
Specify a comma-separated list of archives to be unarchived on all task nodes.
-archives <list of archives>
Auto-completion
Add to ~/.bashrc
or ~/.bash_profile
## Autocompletion for HDFS
# hdfs(1) completion
have()
{
unset -v have
PATH=$PATH:/sbin:/usr/sbin:/usr/local/sbin type $1 &>/dev/null &&
have="yes"
}
have hadoop &&
_hdfs()
{
local cur prev
COMPREPLY=()
cur=${COMP_WORDS[COMP_CWORD]}
prev=${COMP_WORDS[COMP_CWORD-1]}
if [[ "$prev" == hdfs ]]; then
COMPREPLY=( $( compgen -W '-ls -lsr -du -dus -count -mv -cp -rm \
-rmr -expunge -put -copyFromLocal -moveToLocal -mkdir -setrep \
-touchz -test -stat -tail -chmod -chown -chgrp -help' -- $cur ) )
fi
if [[ "$prev" == -ls ]] || [[ "$prev" == -lsr ]] || \
[[ "$prev" == -du ]] || [[ "$prev" == -dus ]] || \
[[ "$prev" == -cat ]] || [[ "$prev" == -mkdir ]] || \
[[ "$prev" == -put ]] || [[ "$prev" == -rm ]] || \
[[ "$prev" == -rmr ]] || [[ "$prev" == -tail ]] || \
[[ "$prev" == -cp ]]; then
if [[ -z "$cur" ]]; then
COMPREPLY=( $( compgen -W "$( hdfs -ls / 2>-|grep -v ^Found|awk '{print $8}' )" -- "$cur" ) )
elif [[ `echo $cur | grep \/$` ]]; then
COMPREPLY=( $( compgen -W "$( hdfs -ls $cur 2>-|grep -v ^Found|awk '{print $8}' )" -- "$cur" ) )
else
COMPREPLY=( $( compgen -W "$( hdfs -ls $cur* 2>-|grep -v ^Found|awk '{print $8}' )" -- "$cur" ) )
fi
fi
} &&
complete -F _hdfs hdfs
unset have