Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New rewrites of dgsh scripts, cleaner and more meaningful #688

Open
wants to merge 13 commits into
base: future
Choose a base branch
from
21 changes: 21 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

## Initialize the necessary temporary files
file1=$(mktemp)

cat >"$file1"

printf 'File type:\t'
file - <"$file1"

printf 'Original size:\t'
wc -c <"$file1"

printf 'xz:\t\t'
xz -c <"$file1" | wc -c

printf 'bzip2:\t\t'
bzip2 -c <"$file1" | wc -c

printf 'gzip:\t\t'
gzip -c <"$file1" | wc -c
112 changes: 112 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/11.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/bin/bash

## Initialize the necessary temporary files
file1=$(mktemp)
file2=$(mktemp)
file3=$(mktemp)
file4=$(mktemp)
file5=$(mktemp)
file6=$(mktemp)
file7=$(mktemp)

export LC_ALL=C

# Commit history in the form of ascending Unix timestamps, emails
git log --pretty=tformat:'%at %ae' |
awk 'NF == 2 && $1 > 100000 && $1 < '`date +%s` |
sort -n > "$file1"

# Calculate number of committers
awk '{print $2}' "$file1" |
sort -u |
wc -l > "$file2"
cp "$file2" "$file3"
cp "$file2" "$file4"

# Calculate last commit timestamp in seconds
tail -1 "$file1" |
awk '{print $1}' > "$file5"

# Calculate first commit timestamp in seconds
head -1 "$file1" |
awk '{print $1}' >> "$file5"

# Gather last and first commit timestamp and compute the difference in days
cat "$file5" |
tr '\n' ' ' |
awk '{print int(($1 - $2) / 60 / 60 / 24)}' > "$file5"

sort -k2 "$file1" > "$file6"

# Place committers left/right of the median according to the number of their commits
awk '{print $2}' "$file1" |
sort |
uniq -c |
sort -n |
awk -v committers1="$file2" '
BEGIN {
while ((getline NCOMMITTERS < committers1) > 0) {}
l = 0; r = NCOMMITTERS;
}
{print NR % 2 ? l++ : --r, $2}' |
sort -k2 > "$file7"

# Join committer positions with commit timestamps based on committer email
join -j 2 "$file6" "$file7" |
sort -k 2n > "$file6"

# Create portable bitmap
{
echo 'P1'
{
cat "$file3"
cat "$file5"
} |
tr '\n' ' ' |
awk '{print $1, $2}'

perl -na -e '
BEGIN {
open(my $ncf, "<", "'"$file4"'");
$ncommitters = <$ncf>;
@empty[$ncommitters - 1] = 0; @committers = @empty;
}
sub out {
print join("", map($_ ? "1" : "0", @committers)), "\n";
}

$day = int($F[1] / 60 / 60 / 24);
$pday = $day if (!defined($pday));

while ($day != $pday) {
out();
@committers = @empty;
$pday++;
}

$committers[$F[2]] = 1;

END { out(); }
' "$file6"
} |
pgmmorphconv -erode <(
cat <<EOF
P1
7 7
1 1 1 0 1 1 1
1 1 0 0 0 1 1
1 0 0 0 0 0 1
0 0 0 0 0 0 0
1 0 0 0 0 0 1
1 1 0 0 0 1 1
1 1 1 0 1 1 1
EOF
) |
tee |
{
# Full-scale image
pnmtopng >large.png
# A smaller image
pamscale -width 640 |
pnmtopng >small.png
}
25 changes: 25 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/17.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

# Initialize the necessary temporary files
file1=$(mktemp)
file2=$(mktemp)
file3=$(mktemp)
file4=$(mktemp)

# Save the ls output to a temporary file
ls -n > "$file1"

# Reorder fields in DIR-like way
awk '!/^total/ {print $6, $7, $8, $1, sprintf("%8d", $5), $9}' "$file1" > "$file2"

# Count number of files
wc -l "$file1" | tr -d \\n > "$file3"
echo -n ' File(s) ' >> "$file3"
awk '{s += $5} END {printf("%d bytes\n", s)}' "$file1" >> "$file3"

# Count number of directories and print label for number of dirs and calculate free bytes
grep -c '^d' "$file1" | tr -d \\n > "$file4"
df -h . | awk '!/Use%/{print " Dir(s) " $4 " bytes free"}' >> "$file4"

# Display the results
cat "$file2" "$file3" "$file4"
16 changes: 16 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/18.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

# Initialize the necessary temporary files
file1=$(mktemp)
file2=$(mktemp)
file3=$(mktemp)

# Read the input stream and save to a temporary file
cat $INPUT_FILE > "$file1"

# Process the input in two different ways
cut -d , -f 5-6 "$file1" > "$file2"
cut -d , -f 2-4 "$file1" > "$file3"

# Merge the processed results
paste -d , "$file2" "$file3"
24 changes: 24 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

## Note: Needs to be run on a big git repository to make sense (maybe linux)

## Initialize the necessary temporary files
file1=$(mktemp)

forder()
{
sort |
uniq -c |
sort -rn
}


git log --format="%an:%ad" --date=default "$@" >"$file1"

echo "Authors ordered by number of commits"
# Order by frequency
awk -F: '{print $1}' <"$file1" | forder

echo "Days ordered by number of commits"
# Order by frequency
awk -F: '{print substr($2, 1, 3)}' <"$file1" | forder
132 changes: 132 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/3.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/bin/bash

## Note: Needs to be run on a big git repository to make sense (maybe linux)

## Initialize the necessary temporary files
file1=$(mktemp)
file2=$(mktemp)
file3=$(mktemp)
file4=$(mktemp)

find "$@" \( -name \*.c -or -name \*.h \) -type f -print0 >"$file1"

echo -n 'FNAMELEN: '

tr \\0 \\n <"$file1" |
# Remove path
sed 's|^.*/||' |
# Maintain average
awk '{s += length($1); n++} END {
if (n>0)
print s / n;
else
print 0; }'

xargs -0 /bin/cat <"$file1" >"$file2"

sed 's/#/@/g;s/\\[\\"'\'']/@/g;s/"[^"]*"/""/g;'"s/'[^']*'/''/g" <"$file2" |
cpp -P >"$file3"

# Structure definitions
echo -n 'NSTRUCT: '

egrep -c 'struct[ ]*{|struct[ ]*[a-zA-Z_][a-zA-Z0-9_]*[ ]*{' <"$file3"
#}} (match preceding openings)

# Type definitions
echo -n 'NTYPEDEF: '
grep -cw typedef <"$file3"

# Use of void
echo -n 'NVOID: '
grep -cw void <"$file3"

# Use of gets
echo -n 'NGETS: '
grep -cw gets <"$file3"

# Average identifier length
echo -n 'IDLEN: '

tr -cs 'A-Za-z0-9_' '\n' <"$file3" |
sort -u |
awk '/^[A-Za-z]/ { len += length($1); n++ } END {
if (n>0)
print len / n;
else
print 0; }'

echo -n 'CHLINESCHAR: '
wc -lc <"$file2" |
awk '{OFS=":"; print $1, $2}'

echo -n 'NCCHAR: '
sed 's/#/@/g' <"$file2" |
cpp -traditional -P |
wc -c |
awk '{OFMT = "%.0f"; print $1/1000}'

# Number of comments
echo -n 'NCOMMENT: '
egrep -c '/\*|//' <"$file2"

# Occurences of the word Copyright
echo -n 'NCOPYRIGHT: '
grep -ci copyright <"$file2"

# C files
find "$@" -name \*.c -type f -print0 >"$file2"

# Convert to newline separation for counting
tr \\0 \\n <"$file2" >"$file3"

# Number of C files
echo -n 'NCFILE: '
wc -l <"$file3"

# Number of directories containing C files
echo -n 'NCDIR: '
sed 's,/[^/]*$,,;s,^.*/,,' <"$file3" |
sort -u |
wc -l

# C code
xargs -0 /bin/cat <"$file2" >"$file3"

# Lines and characters
echo -n 'CLINESCHAR: '
wc -lc <"$file3" |
awk '{OFS=":"; print $1, $2}'

# C code without comments and strings
sed 's/#/@/g;s/\\[\\"'\'']/@/g;s/"[^"]*"/""/g;'"s/'[^']*'/''/g" <"$file3" |
cpp -P >"$file4"

# Number of functions
echo -n 'NFUNCTION: '
grep -c '^{' <"$file4"

# Number of gotos
echo -n 'NGOTO: '
grep -cw goto <"$file4"

# Occurrences of the register keyword
echo -n 'NREGISTER: '
grep -cw register <"$file4"

# Number of macro definitions
echo -n 'NMACRO: '
grep -c '@[ ]*define[ ][ ]*[a-zA-Z_][a-zA-Z0-9_]*(' <"$file4"
# Number of include directives
echo -n 'NINCLUDE: '
grep -c '@[ ]*include' <"$file4"

# Number of constants
echo -n 'NCONST: '
grep -ohw '[0-9][x0-9][0-9a-f]*' <"$file4" | wc -l


# Header files
echo -n 'NHFILE: '
find "$@" -name \*.h -type f |
wc -l
35 changes: 35 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/4.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash

## Initialize the necessary temporary files
file1=$(mktemp)
file2=$(mktemp)
file3=$(mktemp)

# Create list of files
find "$@" -type f |

# Produce lines of the form
# MD5(filename)= 811bfd4b5974f39e986ddc037e1899e7
xargs openssl md5 |

# Convert each line into a "filename md5sum" pair
sed 's/^MD5(//;s/)= / /' |

# Sort by MD5 sum
sort -k2 > "$file1"

# Print an MD5 sum for each file that appears more than once
awk '{print $2}' < "$file1" | uniq -d > "$file2"


# Join the repeated MD5 sums with the corresponding file names
# Join expects two inputs, second will come from scatter
# XXX make streaming input identifiers transparent to users
join -2 2 "$file2" "$file1" |

# Output same files on a single line
awk '
BEGIN {ORS=""}
$1 != prev && prev {print "\n"}
END {if (prev) print "\n"}
{if (prev) print " "; prev = $1; print $2}'
Loading
Loading