Skip to content

Commit

Permalink
Try to guess language by file content
Browse files Browse the repository at this point in the history
  • Loading branch information
gandarez committed Aug 16, 2023
1 parent c847afa commit 4e69981
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 9 deletions.
47 changes: 43 additions & 4 deletions pkg/language/chroma.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,50 @@ func detectChromaCustomized(filepath string) (heartbeat.Language, float32, bool)
return language, weight, true
}

// Finally, try matching by file content.
head, err := fileHead(filepath)
if err != nil {
log.Warnf("failed to load head from file %q: %s", filepath, err)
return heartbeat.LanguageUnknown, 0, false
}

if len(head) == 0 {
return heartbeat.LanguageUnknown, 0, false
}

if lexer := analyse(string(head)); lexer != nil {
language, ok := heartbeat.ParseLanguageFromChroma(lexer.Config().Name)
if !ok {
log.Warnf("failed to parse language from chroma lexer name %q", lexer.Config().Name)
return heartbeat.LanguageUnknown, 0, false
}

return language, 0, true
}

return heartbeat.LanguageUnknown, 0, false
}

// analyse text content and return the "best" lexer.
// This is a copy of chroma.lexers.internal.api:Analyse().
func analyse(text string) chroma.Lexer {
var picked chroma.Lexer

highest := float32(0.0)

for _, lexer := range lexers.Registry.Lexers {
if analyser, ok := lexer.(chroma.Analyser); ok {
weight := analyser.AnalyseText(text)
if weight > highest {
picked = lexer
highest = weight
}
}
}

return picked
}

// weightedLexer is a lexer with priority and weight.
type weightedLexer struct {
chroma.Lexer
Expand All @@ -125,7 +166,7 @@ func selectByCustomizedPriority(filepath string, lexers chroma.PrioritisedLexers

extensions, err := loadFolderExtensions(dir)
if err != nil {
log.Warnf("failed to load folder extensions: %s", err)
log.Warnf("failed to load folder files extensions: %s", err)
return lexers[0], 0
}

Expand Down Expand Up @@ -214,9 +255,7 @@ func fileHead(filepath string) ([]byte, error) {
}
}()

data := make([]byte, maxFileSize)

_, err = f.ReadAt(data, 0)
data, err := io.ReadAll(io.LimitReader(f, maxFileSize))
if err != nil && err != io.EOF {
return nil, fmt.Errorf("failed to read bytes from file: %s", err)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/language/language.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func correspondingFileExists(fp string, extension string) bool {
return false
}

// loadFolderExtensions loads all existing from a folder.
// loadFolderExtensions loads all existing file extensions from a folder.
func loadFolderExtensions(dir string) ([]string, error) {
files, err := os.ReadDir(dir)
if err != nil {
Expand Down
16 changes: 12 additions & 4 deletions pkg/language/language_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ func TestDetect_ChromaTopLanguagesRetrofit(t *testing.T) {
"path/to/zshrc",
"path/to/.zshrc",
"path/to/PKGBUILD",
"testdata/bash",
},
Expected: heartbeat.LanguageBash,
},
Expand Down Expand Up @@ -469,8 +470,11 @@ func TestDetect_ChromaTopLanguagesRetrofit(t *testing.T) {
Expected: heartbeat.LanguageINI,
},
"java": {
Filepaths: []string{"path/to/java.java"},
Expected: heartbeat.LanguageJava,
Filepaths: []string{
"path/to/java.java",
"testdata/java",
},
Expected: heartbeat.LanguageJava,
},
"javascript": {
Filepaths: []string{
Expand Down Expand Up @@ -586,8 +590,11 @@ func TestDetect_ChromaTopLanguagesRetrofit(t *testing.T) {
Expected: heartbeat.LanguagePawn,
},
"perl not prolog": {
Filepaths: []string{"testdata/codefiles/chroma_unsupported_top/perl.pl"},
Expected: heartbeat.LanguagePerl,
Filepaths: []string{
"testdata/codefiles/chroma_unsupported_top/perl.pl",
"testdata/perl",
},
Expected: heartbeat.LanguagePerl,
},
"php": {
Filepaths: []string{
Expand Down Expand Up @@ -655,6 +662,7 @@ func TestDetect_ChromaTopLanguagesRetrofit(t *testing.T) {
"path/to/BUILD.bazel",
"path/to/WORKSPACE",
"path/to/file.tac",
"testdata/python3",
},
Expected: heartbeat.LanguagePython,
},
Expand Down
19 changes: 19 additions & 0 deletions pkg/language/testdata/bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

# Greet user and request their name
echo "The activity generator"
read -p "What is your name? " name

# Create an array of activities
activity[0]="Football"
activity[1]="Table Tennis"
activity[2]="8 Ball Pool"
activity[3]="PS5"
activity[4]="Blackjack"

array_length=${#activity[@]} # Store the length of the array
index=$(($RANDOM % $array_length)) # Randomly select an index from 0 to array_length

# Invite the user to join you participate in an activity
echo "Hi" $name, "would you like to play" ${activity[$index]}"?"
read -p "Answer: " answer
18 changes: 18 additions & 0 deletions pkg/language/testdata/perl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/perl
use strict;
use warnings;

use Path::Tiny;

my $dir = path('foo','bar'); # foo/bar

# Iterate over the content of foo/bar
my $iter = $dir->iterator;
while (my $file = $iter->()) {

# See if it is a directory and skip
next if $file->is_dir();

# Print out the file name and path
print "$file\n";
}
4 changes: 4 additions & 0 deletions pkg/language/testdata/python3
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/python3

if __name__ == "__main__":
print("Hello, World!")

0 comments on commit 4e69981

Please sign in to comment.