Merge pull request #38 from code4me-me/aral_user_study

Aral's User Study
code4me-me · Mar 18, 2024 · 620bfa3 · 620bfa3
2 parents e723b21 + 1b0af53
commit 620bfa3
Show file tree

Hide file tree

Showing 15 changed files with 2,907 additions and 596 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,4 +5,6 @@ build
 venv
 __pycache__
 users*.json
+data_aral
 
+models
diff --git a/code4me-server/requirements.txt b/code4me-server/requirements.txt
@@ -8,3 +8,4 @@ nltk~=3.8.1
 datasets~=2.9.0
 markdown~=3.4.1
 joblib~=1.2.0
+safetensors 
diff --git a/code4me-server/src/api.py b/code4me-server/src/api.py
@@ -1,22 +1,136 @@
-import glob
-import json
-import os
-import uuid
-import random
-from typing import List
-import time
+from __future__ import annotations 
+import os, time, random, json, uuid, glob, torch, traceback
+
+from enum import Enum
+from typing import List, Tuple
 from model import Model
 from datetime import datetime
 from joblib import Parallel, delayed
-from flask import Blueprint, request, Response, redirect
-import torch
-
+from flask import Blueprint, request, Response, redirect, current_app
 from limiter import limiter
 
+from user_study import (
+    filter_request, 
+    store_completion_request,
+    should_prompt_survey,
+    USER_STUDY_DIR,
+)
+
 v1 = Blueprint("v1", __name__)
+v2 = Blueprint("v2", __name__)
 
 os.makedirs("data", exist_ok=True)
 
+def authorise(req) -> str: 
+    ''' Authorise the request. Raise ValueError if the request is not authorised. '''
+
+    auth = req.authorization.token
+    if auth is None:
+        raise ValueError("Missing bearer token")
+    return auth
+
+def get_predictions(completion_request: dict) -> Tuple[float, dict[str, str]]: 
+    ''' Return a list of predictions. '''
+
+    prefix = completion_request['prefix'].rstrip()
+    suffix = completion_request['suffix']
+
+    def predict_model(model: Model) -> str:
+        try:
+            return model.value[1](prefix, suffix)[0]
+        except torch.cuda.OutOfMemoryError:
+            exit(1)
+
+    t0 = datetime.now()
+    predictions = Parallel(n_jobs=os.cpu_count(), prefer="threads")(delayed(predict_model)(model) for model in Model)
+    time = (datetime.now() - t0).total_seconds() * 1000
+
+    predictions = {model.name: prediction for model, prediction in zip(Model, predictions)}
+    return time, predictions
+
+@v2.route("/prediction/autocomplete", methods=["POST"])
+@limiter.limit("4000/hour")
+def autocomplete_v2():
+
+    try:
+        # TODO: As we want every request to be authorised, this can be extracted into a decorator
+        user_uuid = authorise(request)
+        request_json = request.json
+
+        # TODO: add a None filter type for baseline comparison
+        filter_time, filter_type, should_filter = filter_request(user_uuid, request_json)
+
+        predict_time, predictions = get_predictions(request_json) \
+            if (not should_filter) or (request_json['trigger'] == 'manual') \
+            else (None, {}) 
+
+        log_filter = f'\033[1m{"filter" if should_filter else "predict"}\033[0m'
+        log_context = f'{request_json["prefix"][-10:]}•{request_json["suffix"][:5]}'
+        current_app.logger.warning(f'{log_filter} {log_context} \t{filter_type} {[v[:10] for v in predictions.values()]}')
+
+        verify_token = uuid.uuid4().hex if not should_filter else ''
+        prompt_survey = should_prompt_survey(user_uuid) if not should_filter else False
+
+        store_completion_request(user_uuid, verify_token, {
+            **request_json,
+            'timestamp': datetime.now().isoformat(),
+            'filter_type': filter_type,  
+            'filter_time': filter_time,
+            'should_filter': should_filter,
+            'predict_time': predict_time,
+            'predictions': predictions,
+            'survey': prompt_survey,
+            'study_version': '0.0.1'
+        })
+
+        return {
+            'predictions': predictions,
+            'verifyToken': verify_token,
+            'survey': prompt_survey
+        }
+
+    except Exception as e:
+
+        error_uuid = uuid.uuid4().hex 
+        current_app.logger.warning(f'''
+        Error {error_uuid} for {user_uuid if user_uuid is not None else "unauthenticated user"}
+        {request.json if request.is_json else "no request json found"}
+        ''')
+        traceback.print_exc()
+
+        return response({ "error": error_uuid }, status=400)
+
+@v2.route("/prediction/verify", methods=["POST"])
+@limiter.limit("4000/hour")
+def verify_v2():
+
+    user_uuid = authorise(request)
+    verify_json = request.json
+
+    # current_app.logger.info(verify_json)
+
+    verify_token = verify_json['verifyToken']
+    file_path = os.path.join(USER_STUDY_DIR, user_uuid, f'{verify_token}.json')
+
+    with open(file_path, 'r+') as completion_file:
+        completion_json = json.load(completion_file)
+
+        if 'ground_truth' in completion_json:
+            return response({
+                "error": "Already used verify token"
+            }, status=400)
+
+        completion_json.update(verify_json)
+
+        completion_file.seek(0)
+        completion_file.write(json.dumps(completion_json))
+        completion_file.truncate()
+
+    return response({'success': True})
+
+
+##### NOTE: OLD IMPLEMENTATION KEPT FOR JETBRAINS USERS ####
+# (and, those that have turned of auto-update for vsc extensions)
 
 @v1.route("/prediction/autocomplete", methods=["POST"])
 @limiter.limit("1000/hour")
@@ -85,8 +199,10 @@ def predict_model(model: Model) -> List[str]:
             "rightContext": right_context if store_context else None
         }))
 
-    n_suggestions = len(glob.glob(f"data/{user_token}*.json"))
-    survey = n_suggestions >= 100 and n_suggestions % 50 == 0
+    # # # TODO: disabled surveys temporarily, as we are currently looking through >1M files on every request. 
+    # n_suggestions = len(glob.glob(f"data/{user_token}*.json"))
+    # survey = n_suggestions >= 100 and n_suggestions % 50 == 0
+    survey = False
 
     return response({
         "predictions": unique_predictions,

diff --git a/code4me-server/src/app.py b/code4me-server/src/app.py
@@ -1,15 +1,17 @@
-from pathlib import Path
+import markdown, os
 
-import markdown
+from pathlib import Path
 from flask import Flask, jsonify, render_template
-from api import v1
+from api import v1, v2
 from limiter import limiter
 
 app = Flask(__name__, static_folder="static", template_folder="templates")
 limiter.init_app(app)
 app.register_blueprint(v1, url_prefix='/api/v1')
+app.register_blueprint(v2, url_prefix='/api/v2')
 
-index_md = markdown.markdown(Path("markdowns/index.md").read_text())
+markdown_path = 'markdowns/index.md' 
+index_md = markdown.markdown(Path(markdown_path).read_text())
 
 
 @app.errorhandler(429)

diff --git a/code4me-server/src/codegpt.py b/code4me-server/src/codegpt.py
@@ -3,13 +3,16 @@
 import os
 import torch
 
+# env variable for local testing 
+CODE4ME_TEST = os.environ.get("CODE4ME_TEST", "false") == "true"
+
 checkpoint_path = "gpt2"  # default checkpoint is the non-finetuned gpt2 model
 
 # if CODEGPT_CHECKPOINT_PATH is set, use that checkpoint
 if os.environ.get("CODEGPT_CHECKPOINT_PATH"):
     checkpoint_path = os.environ.get("CODEGPT_CHECKPOINT_PATH")
 
-if not os.path.exists(checkpoint_path):
+if not os.path.exists(checkpoint_path) and not CODE4ME_TEST:
     raise ValueError(f"Invalid checkpoint path: '{checkpoint_path}'")
 
 config = GPT2Config
@@ -23,7 +26,7 @@
 class Beam(object):
     def __init__(self, size, sos, eos):
         self.size = size
-        self.tt = torch.cuda
+        self.tt = torch.cuda if torch.cuda.is_available() else torch
         # The score for each translation on the beam.
         self.scores = self.tt.FloatTensor(size).zero_().to(device)
         # The backpointers at each time-step.
@@ -159,7 +162,8 @@ def DecodeIds(idxs):
 break_ids = [tokenizer.sep_token_id]
 
 m = torch.nn.LogSoftmax(dim=-1).to(device)
-zero = torch.cuda.LongTensor(1).fill_(0).to(device)
+# I presume the .cuda. is not necessary here if it is moved to the CUDA device immediately, but not risking it. 
+zero = torch.cuda.LongTensor(1).fill_(0).to(device) if not CODE4ME_TEST else torch.LongTensor(1).fill_(0).to(device)
 
 def codegpt_predict(left_context: str, right_context: str) -> List[str]:
     left_context = left_context.replace("\n", "<EOL>")
@@ -179,7 +183,7 @@ def codegpt_predict(left_context: str, right_context: str) -> List[str]:
     inputs = torch.tensor(tokens, device=device).unsqueeze(0)
     with torch.no_grad():
         beam_size = 1
-        outputs = model(inputs[:, :-1])[1]
+        outputs = model(inputs)[1] 
         p = []
         for i in range(inputs.shape[0]):
             past = [torch.cat([x[0].unsqueeze(0), x[1].unsqueeze(0)], dim=0) if type(x) == tuple else x for x in

diff --git a/code4me-server/src/model.py b/code4me-server/src/model.py
@@ -1,4 +1,25 @@
+import os 
 from enum import Enum
+from typing import Callable
+
+# NOTE: Convenient for testing, use preset generate functions
+# if os.getenv("CODE4ME_TEST", "false") == "true":
+#     print('''
+#         \033[1m WARNING: RUNNING IN TEST MODE \033[0m
+#           ''')
+#     # if the env variable TEST_MODE is set to True, then remap model.generate to lambda: 'model_name'
+
+#     incoder = type("InCoder", (object,), {})
+#     unixcoder_wrapper = type("UniXCoder", (object,), {})
+#     import codegpt 
+#     # codegpt = type("CodeGPT", (object,), {})
+
+#     incoder.generate = lambda left, right: ['predict_incoder']
+#     unixcoder_wrapper.generate = lambda left, right: [' predict_unixcoder']
+
+#     # codegpt.codegpt_predict = lambda left, right: [' (predict_codegpt']
+# else: 
+#     # ooh yeah, import statements in an else stmt; i see new things every day 
 import incoder
 import unixcoder_wrapper
 import codegpt