binpash · angelhof · Aug 21, 2023 · Aug 31, 2023 · Aug 31, 2023 · Sep 7, 2023
diff --git a/evaluation/benchmarks/dgsh/sequential/1.sh b/evaluation/benchmarks/dgsh/sequential/1.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+
+cat >"$file1"
+
+printf 'File type:\t'
+file - <"$file1"
+
+printf 'Original size:\t'
+wc -c <"$file1"
+
+printf 'xz:\t\t'
+xz -c <"$file1" | wc -c
+
+printf 'bzip2:\t\t'
+bzip2 -c <"$file1" | wc -c
+
+printf 'gzip:\t\t'
+gzip -c <"$file1" | wc -c
diff --git a/evaluation/benchmarks/dgsh/sequential/11.sh b/evaluation/benchmarks/dgsh/sequential/11.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+file4=$(mktemp)
+file5=$(mktemp)
+file6=$(mktemp)
+file7=$(mktemp)
+
+export LC_ALL=C
+
+# Commit history in the form of ascending Unix timestamps, emails
+git log --pretty=tformat:'%at %ae' | 
+awk 'NF == 2 && $1 > 100000 && $1 < '`date +%s` | 
+sort -n > "$file1"
+
+# Calculate number of committers
+awk '{print $2}' "$file1" | 
+sort -u | 
+wc -l > "$file2"
+cp "$file2" "$file3"
+cp "$file2" "$file4"
+
+# Calculate last commit timestamp in seconds
+tail -1 "$file1" | 
+awk '{print $1}' > "$file5"
+
+# Calculate first commit timestamp in seconds
+head -1 "$file1" | 
+awk '{print $1}' >> "$file5"
+
+# Gather last and first commit timestamp and compute the difference in days
+cat "$file5" | 
+tr '\n' ' ' | 
+awk '{print int(($1 - $2) / 60 / 60 / 24)}' > "$file5"
+
+sort -k2 "$file1" > "$file6"
+
+# Place committers left/right of the median according to the number of their commits
+awk '{print $2}' "$file1" | 
+sort | 
+uniq -c | 
+sort -n | 
+awk -v committers1="$file2" '
+BEGIN {
+    while ((getline NCOMMITTERS < committers1) > 0) {}
+    l = 0; r = NCOMMITTERS;
+}
+{print NR % 2 ? l++ : --r, $2}' |
+sort -k2 > "$file7"
+
+# Join committer positions with commit timestamps based on committer email
+join -j 2 "$file6" "$file7" | 
+sort -k 2n > "$file6"
+
+# Create portable bitmap
+{
+    echo 'P1'
+    {
+        cat "$file3"
+        cat "$file5"
+    } | 
+    tr '\n' ' ' | 
+    awk '{print $1, $2}'
+
+    perl -na -e '
+    BEGIN {
+        open(my $ncf, "<", "'"$file4"'");
+        $ncommitters = <$ncf>;
+        @empty[$ncommitters - 1] = 0; @committers = @empty;
+    }
+    sub out {
+        print join("", map($_ ? "1" : "0", @committers)), "\n";
+    }
+
+    $day = int($F[1] / 60 / 60 / 24);
+    $pday = $day if (!defined($pday));
+
+    while ($day != $pday) {
+        out();
+        @committers = @empty;
+        $pday++;
+    }
+
+    $committers[$F[2]] = 1;
+
+    END { out(); }
+    ' "$file6"
+} | 
+pgmmorphconv -erode <(
+cat <<EOF
+P1
+7 7
+1 1 1 0 1 1 1
+1 1 0 0 0 1 1
+1 0 0 0 0 0 1
+0 0 0 0 0 0 0
+1 0 0 0 0 0 1
+1 1 0 0 0 1 1
+1 1 1 0 1 1 1
+EOF
+) | 
+tee | 
+{
+    # Full-scale image
+    pnmtopng >large.png
+    # A smaller image
+    pamscale -width 640 | 
+    pnmtopng >small.png
+}
diff --git a/evaluation/benchmarks/dgsh/sequential/17.sh b/evaluation/benchmarks/dgsh/sequential/17.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+file4=$(mktemp)
+
+# Save the ls output to a temporary file
+ls -n > "$file1"
+
+# Reorder fields in DIR-like way
+awk '!/^total/ {print $6, $7, $8, $1, sprintf("%8d", $5), $9}' "$file1" > "$file2"
+
+# Count number of files
+wc -l "$file1" | tr -d \\n > "$file3"
+echo -n ' File(s) ' >> "$file3"
+awk '{s += $5} END {printf("%d bytes\n", s)}' "$file1" >> "$file3"
+
+# Count number of directories and print label for number of dirs and calculate free bytes
+grep -c '^d' "$file1" | tr -d \\n > "$file4"
+df -h . | awk '!/Use%/{print " Dir(s) " $4 " bytes free"}' >> "$file4"
+
+# Display the results
+cat "$file2" "$file3" "$file4"
diff --git a/evaluation/benchmarks/dgsh/sequential/18.sh b/evaluation/benchmarks/dgsh/sequential/18.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+
+# Read the input stream and save to a temporary file
+cat $INPUT_FILE > "$file1"
+
+# Process the input in two different ways
+cut -d , -f 5-6 "$file1" > "$file2"
+cut -d , -f 2-4 "$file1" > "$file3"
+
+# Merge the processed results
+paste -d , "$file2" "$file3"
diff --git a/evaluation/benchmarks/dgsh/sequential/2.sh b/evaluation/benchmarks/dgsh/sequential/2.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+## Note: Needs to be run on a big git repository to make sense (maybe linux)
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+
+forder()
+{
+	sort |
+	uniq -c |
+	sort -rn
+}
+
+
+git log --format="%an:%ad" --date=default "$@" >"$file1"
+
+echo "Authors ordered by number of commits"
+# Order by frequency
+awk -F: '{print $1}' <"$file1" | forder
+
+echo "Days ordered by number of commits"
+# Order by frequency
+awk -F: '{print substr($2, 1, 3)}' <"$file1" | forder
diff --git a/evaluation/benchmarks/dgsh/sequential/3.sh b/evaluation/benchmarks/dgsh/sequential/3.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+## Note: Needs to be run on a big git repository to make sense (maybe linux)
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+file4=$(mktemp)
+
+find "$@" \( -name \*.c -or -name \*.h \) -type f -print0 >"$file1"
+
+echo -n 'FNAMELEN: '
+
+tr \\0 \\n <"$file1" |
+# Remove path
+sed 's|^.*/||' |
+# Maintain average
+awk '{s += length($1); n++} END {
+    if (n>0)
+        print s / n;
+    else
+        print 0; }'
+
+xargs -0 /bin/cat <"$file1" >"$file2"
+
+sed 's/#/@/g;s/\\[\\"'\'']/@/g;s/"[^"]*"/""/g;'"s/'[^']*'/''/g" <"$file2" |
+    cpp -P >"$file3"
+
+# Structure definitions
+echo -n 'NSTRUCT: '
+
+egrep -c 'struct[   ]*{|struct[   ]*[a-zA-Z_][a-zA-Z0-9_]*[       ]*{' <"$file3"
+#}} (match preceding openings)
+
+# Type definitions
+echo -n 'NTYPEDEF: '
+grep -cw typedef <"$file3"
+
+# Use of void
+echo -n 'NVOID: '
+grep -cw void <"$file3"
+
+# Use of gets
+echo -n 'NGETS: '
+grep -cw gets <"$file3"
+
+# Average identifier length
+echo -n 'IDLEN: '
+
+tr -cs 'A-Za-z0-9_' '\n' <"$file3" |
+sort -u |
+awk '/^[A-Za-z]/ { len += length($1); n++ } END {
+    if (n>0)
+        print len / n;
+    else
+        print 0; }'
+
+echo -n 'CHLINESCHAR: '
+wc -lc  <"$file2" |
+    awk '{OFS=":"; print $1, $2}'
+
+echo -n 'NCCHAR: '
+sed 's/#/@/g' <"$file2" |
+cpp -traditional -P |
+wc -c |
+awk '{OFMT = "%.0f"; print $1/1000}'
+
+# Number of comments
+echo -n 'NCOMMENT: '
+egrep -c '/\*|//' <"$file2"
+
+# Occurences of the word Copyright
+echo -n 'NCOPYRIGHT: '
+grep -ci copyright <"$file2"
+
+# C files
+find "$@" -name \*.c -type f -print0 >"$file2"
+
+# Convert to newline separation for counting
+tr \\0 \\n <"$file2" >"$file3"
+
+# Number of C files
+echo -n 'NCFILE: '
+wc -l <"$file3"
+
+# Number of directories containing C files
+echo -n 'NCDIR: '
+sed 's,/[^/]*$,,;s,^.*/,,' <"$file3" |
+sort -u |
+wc -l
+
+# C code
+xargs -0 /bin/cat <"$file2" >"$file3"
+
+# Lines and characters
+echo -n 'CLINESCHAR: '
+wc -lc <"$file3" |
+awk '{OFS=":"; print $1, $2}'
+
+# C code without comments and strings
+sed 's/#/@/g;s/\\[\\"'\'']/@/g;s/"[^"]*"/""/g;'"s/'[^']*'/''/g" <"$file3" |
+cpp -P >"$file4"
+
+# Number of functions
+echo -n 'NFUNCTION: '
+grep -c '^{' <"$file4"
+
+# Number of gotos
+echo -n 'NGOTO: '
+grep -cw goto <"$file4"
+
+# Occurrences of the register keyword
+echo -n 'NREGISTER: '
+grep -cw register <"$file4"
+
+# Number of macro definitions
+echo -n 'NMACRO: '
+grep -c '@[   ]*define[   ][   ]*[a-zA-Z_][a-zA-Z0-9_]*(' <"$file4"
+# Number of include directives
+echo -n 'NINCLUDE: '
+grep -c '@[   ]*include' <"$file4"
+
+# Number of constants
+echo -n 'NCONST: '
+grep -ohw '[0-9][x0-9][0-9a-f]*' <"$file4" | wc -l 
+
+
+# Header files
+echo -n 'NHFILE: '
+find "$@" -name \*.h -type f |
+wc -l
diff --git a/evaluation/benchmarks/dgsh/sequential/4.sh b/evaluation/benchmarks/dgsh/sequential/4.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+
+# Create list of files
+find "$@" -type f |
+
+# Produce lines of the form
+# MD5(filename)= 811bfd4b5974f39e986ddc037e1899e7
+xargs openssl md5 |
+
+# Convert each line into a "filename md5sum" pair
+sed 's/^MD5(//;s/)= / /' |
+
+# Sort by MD5 sum
+sort -k2 > "$file1"
+
+# Print an MD5 sum for each file that appears more than once
+awk '{print $2}' < "$file1" | uniq -d > "$file2"
+
+
+# Join the repeated MD5 sums with the corresponding file names
+# Join expects two inputs, second will come from scatter
+# XXX make streaming input identifiers transparent to users
+join -2 2 "$file2" "$file1" |
+
+# Output same files on a single line
+awk '
+BEGIN {ORS=""}
+$1 != prev && prev {print "\n"}
+END {if (prev) print "\n"}
+{if (prev) print " "; prev = $1; print $2}'