Skip to content

Commit

Permalink
Split lines in Unix cheat sheet
Browse files Browse the repository at this point in the history
  • Loading branch information
bewt85 committed Dec 21, 2015
1 parent 4ef59f5 commit f495f14
Showing 1 changed file with 19 additions and 10 deletions.
29 changes: 19 additions & 10 deletions Notebooks/Unix/cheat_sheet/unix_cheat_sheet.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
"ls # What else is in this directory\n",
"ls .. # What is in the directory above me\n",
"ls foo/bar/ # What is inside the bar directory which is inside the foo/ directory\n",
"ls -lah foo/ # Give the the details (-l) of all files and folders (-a) using human readable file sizes (-h)\n",
"ls -lah foo/ # Give the the details (-l) of all files and folders (-a) using human\n",
" # readable file sizes (-h)\n",
"cd ../.. # Move up two directories\n",
"cd ../foo/bar # Move up one directory and down into the foo/bar/ subdirectories\n",
"cp -r foo/ baz/ # Copy the foo/ directory into the baz/ directory\n",
Expand All @@ -26,7 +27,8 @@
"\n",
"```\n",
"less bar.bed # scroll through bar.bed\n",
"grep chrom bar.bed | less -S # Only look at lines in bar.bed which have 'chrom' and don't wrap lines (`-S`)\n",
"grep chrom bar.bed | less -S # Only look at lines in bar.bed which have 'chrom' and\n",
" # don't wrap lines (-S)\n",
"head -20 bar.bed # show me the first 20 lines of bar.bed\n",
"tail -20 bar.bed # show me the last 20 lines\n",
"cat bar.bed # show me all of the lines (bad for big files)\n",
Expand All @@ -43,7 +45,8 @@
"grep -r foo baz/ # show me all examples of foo in baz/ and every subdirectory within it\n",
"grep '^foo' bar.bed # show me all of the lines begining with foo\n",
"grep 'foo$' bar.bed # show me all of the lines ending in foo\n",
"grep -i '^[acgt]$' bar.bed # show me all of the lines which only have the characters a,c,g and t (ignoring their case)\n",
"grep -i '^[acgt]$' bar.bed # show me all of the lines which only have the characters\n",
" # a,c,g and t (ignoring their case)\n",
"grep -v foo bar.bed # don't show me any files with foo in them\n",
"```\n",
"\n",
Expand All @@ -56,7 +59,9 @@
"awk -F\"\\t\" '{print $NF}' bar.bed # ignore spaces and print the last column\n",
"awk -F\"\\t\" '{print $(NF-1)}' bar.bed # print the penultimate column\n",
"awk '{sum+=$2} END {print sum}' bar.bed # print the sum of the second column\n",
"awk '/^foo/ {sum+=$2; count+=1} END {print sum/count}' bar.bed # print the average of the second value of lines starting with foo\n",
"awk '/^foo/ {sum+=$2; count+=1} END {print sum/count}' bar.bed # print the average of the\n",
" # second value of lines starting\n",
" # with foo\n",
"```\n",
"\n",
"## Piping, redirection and more advanced queries\n",
Expand All @@ -74,7 +79,7 @@
"# awk => NR: is the row number\n",
"# NR%10: is the modulo (remander) of dividing my 10\n",
"# awk is therefore giving you every 10th line\n",
"# head => only show the first 20 \n",
"# head => only show the first 20\n",
"\n",
"awk '{l=($3-$2+1)}; (l<300 && $2>200000 && $3<250000)' exercises.bed\n",
"# Gives:\n",
Expand Down Expand Up @@ -122,7 +127,8 @@
" exit 2\n",
"fi\n",
"\n",
"# Get the lines which aren't headers, take the first column and return the unique values\n",
"# Get the lines which aren't headers,\n",
"# take the first column and return the unique values\n",
"number_of_contigs_in_one=$(awk '$1 !~ /^#/ {print $1}' $file_one | sort -u | wc -l)\n",
"number_of_contigs_in_two=$(awk '/^[^#]/ {print $1}' $file_two | sort -u | wc -l)\n",
"\n",
Expand Down Expand Up @@ -151,7 +157,7 @@
"* `man the_name_of_a_command` often gives you help\n",
"* Google is normally better at giving examples (prioritise stackoverflow.com results, they're normally good)\n",
"\n",
"#### Build commands slowly\n",
"## Build commands slowly\n",
"\n",
"If you wanted me to calculate the sum of all of the scores for genes on contig-1 in a bed file, I'd probably run each of the following commands before moving onto the next:\n",
"\n",
Expand All @@ -161,9 +167,12 @@
"awk '{print $1}' bar.bed | sort -u | less # check the contigs don't look wierd\n",
"awk '{print $4}' bar.bed | sort -u | less # check the genes don't look wierd\n",
"awk '$4 ~ /gene-/' bar.bed | head -20 # check that I can spot genes\n",
"awk '($1 == \"contig-1\" && $4 ~ /gene-/)' bar.bed | head -20 # check I can find genes on contig-1\n",
"head -20 bar.bed | awk '($1 == \"contig-1\" && $4 ~ /gene-/) {sum+=$5}; END {print sum}' # check my maths\n",
"awk '($1 == \"contig-1\" && $4 ~ /gene-/) {sum+=$5}; END {print sum}' bar.bed # get the answer you actually want\n",
"awk '($1 == \"contig-1\" && $4 ~ /gene-/)' bar.bed | head -20 # check I can find\n",
" # genes on contig-1\n",
"# check my algorithm works on a subset of the data\n",
"head -20 bar.bed | awk '($1 == \"contig-1\" && $4 ~ /gene-/) {sum+=$5}; END {print sum}'\n",
"# apply the algorithm to all of the data\n",
"awk '($1 == \"contig-1\" && $4 ~ /gene-/) {sum+=$5}; END {print sum}' bar.bed\n",
"```\n",
"\n",
"## Which tool should I use?\n",
Expand Down

0 comments on commit f495f14

Please sign in to comment.