Bug fixes and Improvements

iVishalr · Oct 8, 2024 · 47c2948 · 47c2948
1 parent a21813d
commit 47c2948
Show file tree

Hide file tree

Showing 6 changed files with 40 additions and 28 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -14,7 +14,7 @@ jobs:
   build-cpu:
     strategy:
       matrix:
-        os: [ubuntu-latest]
+        os: [ubuntu-latest, macos-latest]
 
     runs-on: ${{ matrix.os }}
 
@@ -39,8 +39,20 @@ jobs:
 
       - name: Compile Third Party Libraries
         run: |
-          make third_party -j
+          make third_party
 
-      - name: Compile gpt.c
+      - name: Compile gpt.c with GCC
         run: | 
-          make
+          make clean && make
+
+      - name: Compile gpt.c with clang
+        run: |
+          make clean && make CC=clang
+
+      - name: Compile gpt.c with GCC and Debug
+        run: |
+          make clean && make CC=gcc BUILD=debug
+
+      - name: Compile gpt.c with clang and Debug
+        run: |
+          make clean && make CC=clang BUILD=debug
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
-CC ?= gcc
-BUILD ?= release
+CC = gcc
+BUILD = release
 CFLAGS_RELEASE = -O3 -Ofast -march=native -Wno-unused-result -ggdb3 -fPIC
 CFLAGS_DEBUG = -Wno-unused-result -O0 -ggdb3 -fPIC
 
@@ -18,19 +18,24 @@ ifeq "$(CC)" "gcc"
 	CFLAGS_DEBUG += -fopenmp -DOMP
 	LDFLAGS += -lgomp
 endif
+
 ifeq "$(CC)" "clang"
 	LDFLAGS += -lomp
 endif
 
 ifeq "$(PLATFORM)" "Darwin"
-	BREW_PATH=$(shell brew --prefix)
     SHARED_SUFFIX=dylib
+	BREW_PATH=$(shell brew --prefix)
 	INCLUDES += -I $(BREW_PATH)/opt/libomp/include -I $(BREW_PATH)/opt/argp-standalone/include
 	LDFLAGS += -L $(BREW_PATH)/opt/libomp/lib -L $(BREW_PATH)/opt/argp-standalone/lib
 endif
 
 ifeq "$(PLATFORM)" "Linux"
     SHARED_SUFFIX=so
+	ifeq "$(CC)" "clang"
+		CFLAGS_RELEASE += -fopenmp -DOMP
+		CFLAGS_DEBUG += -fopenmp -DOMP
+	endif
 endif
 
 ifeq ($(BUILD), release)

diff --git a/include/tensor.h b/include/tensor.h
@@ -68,9 +68,9 @@ void *matmul(
 
 void mul_(tensor_t *x, const float s);
 void pow_(tensor_t *x, const float p);
-void *tensor_copy(tensor_t *dest, const tensor_t *src);
-void *uniform(tensor_t *tensor, const float low, const float high);
-void *shape(const tensor_t *tensor, char *shape);
+void tensor_copy(tensor_t *dest, const tensor_t *src);
+void uniform(tensor_t *tensor, const float low, const float high);
+void shape(const tensor_t *tensor, char *shape);
 void view(tensor_t *tensor, const int *shape, const int n);
 
 tensor_t *tensor_load(FILE *fp, const int *shape, int n);

diff --git a/infer_gpt.c b/infer_gpt.c
@@ -371,9 +371,9 @@ int main(int argc, char **argv) {
     double inference_time_s = (inference_end.tv_sec - inference_start.tv_sec) + (inference_end.tv_nsec - inference_start.tv_nsec) / 1e9;
 
     if (inference_args.interactive == 0) {
-        int *tokens = (int *)mallocCheck(sizeof(int) * total_tokens);
+        uint32_t *tokens = (uint32_t *)mallocCheck(sizeof(uint32_t) * total_tokens);
         for (int i = 0; i < total_tokens; i++)
-            tokens[i] = (int)X->t[i];
+            tokens[i] = (uint32_t)X->t[i];
 
         uint32_t length = tokenizer->decode_length(tokenizer, tokens, total_tokens);
         char *dest = (char *)malloc(sizeof(char) * length + 1);

diff --git a/src/linear.c b/src/linear.c
@@ -118,10 +118,7 @@ void kaiming_uniform(tensor_t *t, float a, const char *mode, const char *non_lin
     float gain = sqrtf(2.0f / (1.0f + powf(negative_slope, 2.0f)));
     float std = gain / sqrtf(fan);
     float bound = sqrtf(3.0f) * std;
-    if (uniform(t, -bound, bound) == NULL) {
-        printf("An error occured when initializing tensor.\n");
-        return;
-    }
+    uniform(t, -bound, bound);
 }
 
 

diff --git a/src/tensor.c b/src/tensor.c
@@ -36,7 +36,7 @@ float gaussRandom() {
 // https://stackoverflow.com/questions/11641629/generating-a-uniform-distribution-of-integers-in-c
 float rand_uniform(float low, float high)
 {
-    float r = rand() / (1.0f + RAND_MAX);
+    float r = rand() / (1.0f + (float)RAND_MAX);
     float range = high - low + 1;
     float scaled = (r * range) + low;
     return scaled;
@@ -261,10 +261,10 @@ void pow_(tensor_t *x, const float p) {
         x->t[i] = powf(x->t[i], p);
 }
 
-void *tensor_copy(tensor_t *dest, const tensor_t *src) {
+void tensor_copy(tensor_t *dest, const tensor_t *src) {
     if (src == NULL || dest == NULL) {
         printf("Either src or dest ptr is NULL.\n");
-        return NULL;
+        return;
     }
 
     cblas_scopy(src->length, src->t, 1, dest->t, 1);
@@ -274,19 +274,17 @@ void *tensor_copy(tensor_t *dest, const tensor_t *src) {
 
     dest->ndims = src->ndims;
     dest->length = src->length;
-    return dest;
 }
 
-void *uniform(tensor_t *tensor, const float low, const float high) {
+void uniform(tensor_t *tensor, const float low, const float high) {
     if (tensor == NULL) {
         printf("Expected required argument *t to be of type tensor_t, but got NULL.");
-        return NULL;
+        return;
     }
 
     for (int i = 0; i < tensor->length; i++) {
         tensor->t[i] = rand_uniform(low, high);
     }
-    return tensor;
 }
 
 tensor_t *tensor_load(FILE *fp, const int *shape, int n) {
@@ -323,9 +321,9 @@ void free_tensor(tensor_t *tensor) {
     free(tensor);
 }
 
-void *shape(const tensor_t *tensor, char *shape) {
+void shape(const tensor_t *tensor, char *shape) {
     if (tensor == NULL || shape == NULL) {
-        return NULL;
+        return;
     }
 
     int counter = 0;
@@ -336,9 +334,9 @@ void *shape(const tensor_t *tensor, char *shape) {
     shape[counter - 2] = '\0';
 }
 
-void *_shape(const tensor_t *tensor, char *_shape) {
-    return shape(tensor, _shape);
-}
+// void *_shape(const tensor_t *tensor, char *_shape) {
+//     return shape(tensor, _shape);
+// }
 
 void view(tensor_t *tensor, const int *shape, const int n) {
     if (tensor == NULL) {