diff --git a/anon.ipynb b/anon.ipynb deleted file mode 100644 index e8951b6..0000000 --- a/anon.ipynb +++ /dev/null @@ -1,320 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from presidio_analyzer import AnalyzerEngine\n", - "from presidio_anonymizer import AnonymizerEngine" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# text=\"My phone number is 212-555-5555\"\n", - "\n", - "text = \"\"\"\n", - "Srikar Kashyap Pulipaka Email : spulipa@iu.edu\n", - "https://linkedin.com/in/srikarkashyap Mobile : +1 812 778 5178\n", - "Education\n", - "• Indiana University Bloomington, IN, US\n", - "Master of Science in Computer Science; GPA: 3.9/4 Aug 2022 - May 2024 (expected)\n", - "◦ Courses: Applied Algorithms, Computer Networks, Applied Machine Learning. Ongoing: Software Engineering,\n", - "Computer Vision, Graph Analytics.\n", - "• K L University Amaravati, India\n", - "Bachelor of Technology in Computer Science; GPA: 8.44/10 Jul 2014 - May 2018\n", - "◦ Courses: Data Structures and Algorithms, Operating Systems, Discrete Mathematics, Artificial Intelligence.\n", - "◦ Honors: University Innovation Fellow at Stanford University. Department Gold Medalist for Best Outgoing\n", - "Student (2014-18). Member of Student Academic Council. Associate President of Department Student Body.\n", - "Experience\n", - "• Heritage Foods Limited Hyderabad, India\n", - "Deputy Manager (Data Scientist and Software Generalist) Oct 2020 - Jul 2022 (Oct 2020 to Sep 2021 contract)\n", - "◦ REST API development: Developed a REST API for an internal mobile application in Flask. Included data\n", - "validation, type validation, JSON input and output. Deployed on Heroku and actively used by the application.\n", - "(Python - flask, Postman, SQL)\n", - "◦ Forecast Model: Complete data science project life cycle including requirements gathering, data collection and\n", - "understanding, exploratory analysis, modeling and presentation. Developed a commodity price prediction model\n", - "with a Mean Absolute Percentage Error (MAPE) of 3%. Used Prophet routine along with python stack (pandas,\n", - "plotly, jupyter) for testing.\n", - "◦ Customer Analytics: Complete data science life cycle. Analyzes vast amounts of data of customers (over 10\n", - "million rows) every month to assign grades to customers. Conducted multiple adoption sessions for users across\n", - "India and explained how the system works and its usage. (Python, SQL, Tableau, Excel)\n", - "Projects\n", - "Automation Software Projects\n", - "Developed multiple software automation scripts consuming external APIs for cost saving and strategic\n", - "planning purposes. Saved the company around 15,00,000 rupees ($20,000) in annual recurring costs and helped\n", - "design Sales strategy by identifying high performing locations. (Python)\n", - "Manufacturing Deviations\n", - "Developed a Python application that reads a TCP stream to detect manufacturing deviations and alert the\n", - "teams. Setup automatic backups and logging for easier debugging. Deployed across India and actively used\n", - "handling over 30,000 events per day. (Python, SQL, Tableau).\n", - "Capstone Project - Machine Translation of English Videos into Regional Indian languages using\n", - "Open Innovation\n", - "Machine Translation of English Videos into Regional Indian languages using Open Innovation: Using free APIs\n", - "to translate English educational videos into regional languages of India using multithreading. Achieved a\n", - "translation time of 70 seconds for a 3 minute video. Winner of the UC Berkeley Open Innovation Hackathon\n", - "2017. Upgraded program in 2022 to use OpenAI Whisper.\n", - "Anti-Spam filter and classifier\n", - "Designed and developed an Anti-Spam email classifier and filter. Uses Naive Bayes algorithm for predicting\n", - "spam emails. Developed a User Interface that takes a user email as the input and provides a prediction.\n", - "Skills\n", - "◦ Languages, Frameworks and Tools: Python, SQL, Java, C, Flask, Tableau.\n", - "◦ Libraries and Skills: Machine Learning, Deep Learning, Natural Language Processing, Computer Vision,\n", - "Scikit-learn, Pandas, Numpy, Keras, TensorFlow, NLTK, SpaCy, Requests, Design Thinking.\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[type: EMAIL_ADDRESS, start: 33, end: 47, score: 1.0, type: PERSON, start: 1, end: 15, score: 0.85, type: LOCATION, start: 142, end: 153, score: 0.85, type: LOCATION, start: 159, end: 161, score: 0.85, type: LOCATION, start: 412, end: 417, score: 0.85, type: LOCATION, start: 849, end: 858, score: 0.85, type: LOCATION, start: 860, end: 865, score: 0.85, type: PERSON, start: 1497, end: 1504, score: 0.85, type: LOCATION, start: 1795, end: 1800, score: 0.85, type: PERSON, start: 1865, end: 1872, score: 0.85, type: LOCATION, start: 2411, end: 2416, score: 0.85, type: LOCATION, start: 2486, end: 2493, score: 0.85, type: LOCATION, start: 2781, end: 2786, score: 0.85, type: LOCATION, start: 3277, end: 3281, score: 0.85, type: LOCATION, start: 3286, end: 3291, score: 0.85, type: LOCATION, start: 3293, end: 3300, score: 0.85, type: PERSON, start: 3459, end: 3464, score: 0.85, type: PHONE_NUMBER, start: 95, end: 110, score: 0.75]\n" - ] - } - ], - "source": [ - "# Set up the engine, loads the NLP module (spaCy model by default) \n", - "# and other PII recognizers\n", - "analyzer = AnalyzerEngine()\n", - "\n", - "# Call analyzer to get results\n", - "results = analyzer.analyze(text=text,\n", - " entities=[\"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"PERSON\", \"LOCATION\", \"CREDIT_CARD\", \"DOMAIN_NAME\", \"IP_ADDRESS\", \"IBAN_CODE\", \"US_SSN\", \"US_DRIVER_LICENSE\", \"US_PASSPORT\"],\n", - " language='en')\n", - "print(results)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "text: \n", - " Pulipaka Email : \n", - "https://linkedin.com/in/srikarkashyap Mobile : \n", - "Education\n", - "• Indiana University , IN, \n", - "Master of Science in Computer Science; GPA: 3.9/4 Aug 2022 - May 2024 (expected)\n", - "◦ Courses: Applied Algorithms, Computer Networks, Applied Machine Learning. Ongoing: Software Engineering,\n", - "Computer Vision, Graph Analytics.\n", - "• K L University Amaravati, \n", - "Bachelor of Technology in Computer Science; GPA: 8.44/10 Jul 2014 - May 2018\n", - "◦ Courses: Data Structures and Algorithms, Operating Systems, Discrete Mathematics, Artificial Intelligence.\n", - "◦ Honors: University Innovation Fellow at Stanford University. Department Gold Medalist for Best Outgoing\n", - "Student (2014-18). Member of Student Academic Council. Associate President of Department Student Body.\n", - "Experience\n", - "• Heritage Foods Limited , \n", - "Deputy Manager (Data Scientist and Software Generalist) Oct 2020 - Jul 2022 (Oct 2020 to Sep 2021 contract)\n", - "◦ REST API development: Developed a REST API for an internal mobile application in Flask. Included data\n", - "validation, type validation, JSON input and output. Deployed on Heroku and actively used by the application.\n", - "(Python - flask, Postman, SQL)\n", - "◦ Forecast Model: Complete data science project life cycle including requirements gathering, data collection and\n", - "understanding, exploratory analysis, modeling and presentation. Developed a commodity price prediction model\n", - "with a Mean Absolute Percentage Error (MAPE) of 3%. Used routine along with python stack (pandas,\n", - "plotly, jupyter) for testing.\n", - "◦ Customer Analytics: Complete data science life cycle. Analyzes vast amounts of data of customers (over 10\n", - "million rows) every month to assign grades to customers. Conducted multiple adoption sessions for users across\n", - " and explained how the system works and its usage. (Python, SQL, , Excel)\n", - "Projects\n", - "Automation Software Projects\n", - "Developed multiple software automation scripts consuming external APIs for cost saving and strategic\n", - "planning purposes. Saved the company around 15,00,000 rupees ($20,000) in annual recurring costs and helped\n", - "design Sales strategy by identifying high performing locations. (Python)\n", - "Manufacturing Deviations\n", - "Developed a Python application that reads a TCP stream to detect manufacturing deviations and alert the\n", - "teams. Setup automatic backups and logging for easier debugging. Deployed across and actively used\n", - "handling over 30,000 events per day. (Python, SQL, ).\n", - "Capstone Project - Machine Translation of English Videos into Regional Indian languages using\n", - "Open Innovation\n", - "Machine Translation of English Videos into Regional Indian languages using Open Innovation: Using free APIs\n", - "to translate English educational videos into regional languages of using multithreading. Achieved a\n", - "translation time of 70 seconds for a 3 minute video. Winner of the UC Berkeley Open Innovation Hackathon\n", - "2017. Upgraded program in 2022 to use OpenAI Whisper.\n", - "Anti-Spam filter and classifier\n", - "Designed and developed an Anti-Spam email classifier and filter. Uses Naive Bayes algorithm for predicting\n", - "spam emails. Developed a User Interface that takes a user email as the input and provides a prediction.\n", - "Skills\n", - "◦ Languages, Frameworks and Tools: Python, SQL, , C, , .\n", - "◦ Libraries and Skills: Machine Learning, Deep Learning, Natural Language Processing, Computer Vision,\n", - "Scikit-learn, Pandas, Numpy, Keras, TensorFlow, NLTK, , Requests, Design Thinking.\n", - "\n", - "items:\n", - "[\n", - " {'start': 3505, 'end': 3513, 'entity_type': 'PERSON', 'text': '', 'operator': 'replace'},\n", - " {'start': 3336, 'end': 3346, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\n", - " {'start': 3324, 'end': 3334, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\n", - " {'start': 3309, 'end': 3319, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\n", - " {'start': 2808, 'end': 2818, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\n", - " {'start': 2510, 'end': 2520, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\n", - " {'start': 2430, 'end': 2440, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\n", - " {'start': 1883, 'end': 1891, 'entity_type': 'PERSON', 'text': '', 'operator': 'replace'},\n", - " {'start': 1808, 'end': 1818, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\n", - " {'start': 1509, 'end': 1517, 'entity_type': 'PERSON', 'text': '', 'operator': 'replace'},\n", - " {'start': 867, 'end': 877, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\n", - " {'start': 855, 'end': 865, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\n", - " {'start': 413, 'end': 423, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\n", - " {'start': 152, 'end': 162, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\n", - " {'start': 136, 'end': 146, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\n", - " {'start': 90, 'end': 104, 'entity_type': 'PHONE_NUMBER', 'text': '', 'operator': 'replace'},\n", - " {'start': 27, 'end': 42, 'entity_type': 'EMAIL_ADDRESS', 'text': '', 'operator': 'replace'},\n", - " {'start': 1, 'end': 9, 'entity_type': 'PERSON', 'text': '', 'operator': 'replace'}\n", - "]\n", - "\n" - ] - } - ], - "source": [ - "# Analyzer results are passed to the AnonymizerEngine for anonymization\n", - "\n", - "anonymizer = AnonymizerEngine()\n", - "\n", - "anonymized_text = anonymizer.anonymize(text=text,analyzer_results=results)\n", - "\n", - "print(anonymized_text)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "import spacy" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "nlp = spacy.load(\"en_core_web_lg\")" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "ename": "RuntimeError", - "evalue": "[E896] There was an error using the static vectors. Ensure that the vectors of the vocab are properly initialized, or set 'include_static_vectors' to False.", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\spacy\\ml\\staticvectors.py:56\u001b[0m, in \u001b[0;36mforward\u001b[1;34m(model, docs, is_train)\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 56\u001b[0m vectors_data \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mops\u001b[39m.\u001b[39;49mgemm(V, W, trans2\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n\u001b[0;32m 57\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mValueError\u001b[39;00m:\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\thinc\\backends\\numpy_ops.pyx:101\u001b[0m, in \u001b[0;36mthinc.backends.numpy_ops.NumpyOps.gemm\u001b[1;34m()\u001b[0m\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\blis\\py.pyx:74\u001b[0m, in \u001b[0;36mblis.py.gemm\u001b[1;34m()\u001b[0m\n", - "\u001b[1;31mValueError\u001b[0m: Shape mismatch for blis.gemm: (662, 0), (300, 96)", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[27], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m text \u001b[39m=\u001b[39m nlp(text)\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\spacy\\language.py:1016\u001b[0m, in \u001b[0;36mLanguage.__call__\u001b[1;34m(self, text, disable, component_cfg)\u001b[0m\n\u001b[0;32m 1014\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(Errors\u001b[39m.\u001b[39mE109\u001b[39m.\u001b[39mformat(name\u001b[39m=\u001b[39mname)) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n\u001b[0;32m 1015\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m-> 1016\u001b[0m error_handler(name, proc, [doc], e)\n\u001b[0;32m 1017\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(doc, Doc):\n\u001b[0;32m 1018\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(Errors\u001b[39m.\u001b[39mE005\u001b[39m.\u001b[39mformat(name\u001b[39m=\u001b[39mname, returned_type\u001b[39m=\u001b[39m\u001b[39mtype\u001b[39m(doc)))\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\spacy\\util.py:1689\u001b[0m, in \u001b[0;36mraise_error\u001b[1;34m(proc_name, proc, docs, e)\u001b[0m\n\u001b[0;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mraise_error\u001b[39m(proc_name, proc, docs, e):\n\u001b[1;32m-> 1689\u001b[0m \u001b[39mraise\u001b[39;00m e\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\spacy\\language.py:1011\u001b[0m, in \u001b[0;36mLanguage.__call__\u001b[1;34m(self, text, disable, component_cfg)\u001b[0m\n\u001b[0;32m 1009\u001b[0m error_handler \u001b[39m=\u001b[39m proc\u001b[39m.\u001b[39mget_error_handler()\n\u001b[0;32m 1010\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1011\u001b[0m doc \u001b[39m=\u001b[39m proc(doc, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mcomponent_cfg\u001b[39m.\u001b[39mget(name, {})) \u001b[39m# type: ignore[call-arg]\u001b[39;00m\n\u001b[0;32m 1012\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 1013\u001b[0m \u001b[39m# This typically happens if a component is not initialized\u001b[39;00m\n\u001b[0;32m 1014\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(Errors\u001b[39m.\u001b[39mE109\u001b[39m.\u001b[39mformat(name\u001b[39m=\u001b[39mname)) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\spacy\\pipeline\\trainable_pipe.pyx:56\u001b[0m, in \u001b[0;36mspacy.pipeline.trainable_pipe.TrainablePipe.__call__\u001b[1;34m()\u001b[0m\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\spacy\\util.py:1689\u001b[0m, in \u001b[0;36mraise_error\u001b[1;34m(proc_name, proc, docs, e)\u001b[0m\n\u001b[0;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mraise_error\u001b[39m(proc_name, proc, docs, e):\n\u001b[1;32m-> 1689\u001b[0m \u001b[39mraise\u001b[39;00m e\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\spacy\\pipeline\\trainable_pipe.pyx:52\u001b[0m, in \u001b[0;36mspacy.pipeline.trainable_pipe.TrainablePipe.__call__\u001b[1;34m()\u001b[0m\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\spacy\\pipeline\\tok2vec.py:125\u001b[0m, in \u001b[0;36mTok2Vec.predict\u001b[1;34m(self, docs)\u001b[0m\n\u001b[0;32m 123\u001b[0m width \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmodel\u001b[39m.\u001b[39mget_dim(\u001b[39m\"\u001b[39m\u001b[39mnO\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 124\u001b[0m \u001b[39mreturn\u001b[39;00m [\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmodel\u001b[39m.\u001b[39mops\u001b[39m.\u001b[39malloc((\u001b[39m0\u001b[39m, width)) \u001b[39mfor\u001b[39;00m doc \u001b[39min\u001b[39;00m docs]\n\u001b[1;32m--> 125\u001b[0m tokvecs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmodel\u001b[39m.\u001b[39;49mpredict(docs)\n\u001b[0;32m 126\u001b[0m \u001b[39mreturn\u001b[39;00m tokvecs\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\thinc\\model.py:315\u001b[0m, in \u001b[0;36mModel.predict\u001b[1;34m(self, X)\u001b[0m\n\u001b[0;32m 311\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mpredict\u001b[39m(\u001b[39mself\u001b[39m, X: InT) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m OutT:\n\u001b[0;32m 312\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Call the model's `forward` function with `is_train=False`, and return\u001b[39;00m\n\u001b[0;32m 313\u001b[0m \u001b[39m only the output, instead of the `(output, callback)` tuple.\u001b[39;00m\n\u001b[0;32m 314\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 315\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_func(\u001b[39mself\u001b[39;49m, X, is_train\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m)[\u001b[39m0\u001b[39m]\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\thinc\\layers\\chain.py:55\u001b[0m, in \u001b[0;36mforward\u001b[1;34m(model, X, is_train)\u001b[0m\n\u001b[0;32m 53\u001b[0m callbacks \u001b[39m=\u001b[39m []\n\u001b[0;32m 54\u001b[0m \u001b[39mfor\u001b[39;00m layer \u001b[39min\u001b[39;00m model\u001b[39m.\u001b[39mlayers:\n\u001b[1;32m---> 55\u001b[0m Y, inc_layer_grad \u001b[39m=\u001b[39m layer(X, is_train\u001b[39m=\u001b[39;49mis_train)\n\u001b[0;32m 56\u001b[0m callbacks\u001b[39m.\u001b[39mappend(inc_layer_grad)\n\u001b[0;32m 57\u001b[0m X \u001b[39m=\u001b[39m Y\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\thinc\\model.py:291\u001b[0m, in \u001b[0;36mModel.__call__\u001b[1;34m(self, X, is_train)\u001b[0m\n\u001b[0;32m 288\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__call__\u001b[39m(\u001b[39mself\u001b[39m, X: InT, is_train: \u001b[39mbool\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tuple[OutT, Callable]:\n\u001b[0;32m 289\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Call the model's `forward` function, returning the output and a\u001b[39;00m\n\u001b[0;32m 290\u001b[0m \u001b[39m callback to compute the gradients via backpropagation.\"\"\"\u001b[39;00m\n\u001b[1;32m--> 291\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_func(\u001b[39mself\u001b[39;49m, X, is_train\u001b[39m=\u001b[39;49mis_train)\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\thinc\\layers\\chain.py:55\u001b[0m, in \u001b[0;36mforward\u001b[1;34m(model, X, is_train)\u001b[0m\n\u001b[0;32m 53\u001b[0m callbacks \u001b[39m=\u001b[39m []\n\u001b[0;32m 54\u001b[0m \u001b[39mfor\u001b[39;00m layer \u001b[39min\u001b[39;00m model\u001b[39m.\u001b[39mlayers:\n\u001b[1;32m---> 55\u001b[0m Y, inc_layer_grad \u001b[39m=\u001b[39m layer(X, is_train\u001b[39m=\u001b[39;49mis_train)\n\u001b[0;32m 56\u001b[0m callbacks\u001b[39m.\u001b[39mappend(inc_layer_grad)\n\u001b[0;32m 57\u001b[0m X \u001b[39m=\u001b[39m Y\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\thinc\\model.py:291\u001b[0m, in \u001b[0;36mModel.__call__\u001b[1;34m(self, X, is_train)\u001b[0m\n\u001b[0;32m 288\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__call__\u001b[39m(\u001b[39mself\u001b[39m, X: InT, is_train: \u001b[39mbool\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tuple[OutT, Callable]:\n\u001b[0;32m 289\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Call the model's `forward` function, returning the output and a\u001b[39;00m\n\u001b[0;32m 290\u001b[0m \u001b[39m callback to compute the gradients via backpropagation.\"\"\"\u001b[39;00m\n\u001b[1;32m--> 291\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_func(\u001b[39mself\u001b[39;49m, X, is_train\u001b[39m=\u001b[39;49mis_train)\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\thinc\\layers\\concatenate.py:44\u001b[0m, in \u001b[0;36mforward\u001b[1;34m(model, X, is_train)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mforward\u001b[39m(model: Model[InT, OutT], X: InT, is_train: \u001b[39mbool\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tuple[OutT, Callable]:\n\u001b[1;32m---> 44\u001b[0m Ys, callbacks \u001b[39m=\u001b[39m \u001b[39mzip\u001b[39m(\u001b[39m*\u001b[39m[layer(X, is_train\u001b[39m=\u001b[39mis_train) \u001b[39mfor\u001b[39;00m layer \u001b[39min\u001b[39;00m model\u001b[39m.\u001b[39mlayers])\n\u001b[0;32m 45\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(Ys[\u001b[39m0\u001b[39m], \u001b[39mlist\u001b[39m):\n\u001b[0;32m 46\u001b[0m data_l, backprop \u001b[39m=\u001b[39m _list_forward(model, X, Ys, callbacks, is_train)\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\thinc\\layers\\concatenate.py:44\u001b[0m, in \u001b[0;36m\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mforward\u001b[39m(model: Model[InT, OutT], X: InT, is_train: \u001b[39mbool\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tuple[OutT, Callable]:\n\u001b[1;32m---> 44\u001b[0m Ys, callbacks \u001b[39m=\u001b[39m \u001b[39mzip\u001b[39m(\u001b[39m*\u001b[39m[layer(X, is_train\u001b[39m=\u001b[39;49mis_train) \u001b[39mfor\u001b[39;00m layer \u001b[39min\u001b[39;00m model\u001b[39m.\u001b[39mlayers])\n\u001b[0;32m 45\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(Ys[\u001b[39m0\u001b[39m], \u001b[39mlist\u001b[39m):\n\u001b[0;32m 46\u001b[0m data_l, backprop \u001b[39m=\u001b[39m _list_forward(model, X, Ys, callbacks, is_train)\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\thinc\\model.py:291\u001b[0m, in \u001b[0;36mModel.__call__\u001b[1;34m(self, X, is_train)\u001b[0m\n\u001b[0;32m 288\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__call__\u001b[39m(\u001b[39mself\u001b[39m, X: InT, is_train: \u001b[39mbool\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tuple[OutT, Callable]:\n\u001b[0;32m 289\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Call the model's `forward` function, returning the output and a\u001b[39;00m\n\u001b[0;32m 290\u001b[0m \u001b[39m callback to compute the gradients via backpropagation.\"\"\"\u001b[39;00m\n\u001b[1;32m--> 291\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_func(\u001b[39mself\u001b[39;49m, X, is_train\u001b[39m=\u001b[39;49mis_train)\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\spacy\\ml\\staticvectors.py:58\u001b[0m, in \u001b[0;36mforward\u001b[1;34m(model, docs, is_train)\u001b[0m\n\u001b[0;32m 56\u001b[0m vectors_data \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39mops\u001b[39m.\u001b[39mgemm(V, W, trans2\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m 57\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mValueError\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(Errors\u001b[39m.\u001b[39mE896)\n\u001b[0;32m 59\u001b[0m \u001b[39mif\u001b[39;00m vocab\u001b[39m.\u001b[39mvectors\u001b[39m.\u001b[39mmode \u001b[39m==\u001b[39m Mode\u001b[39m.\u001b[39mdefault:\n\u001b[0;32m 60\u001b[0m \u001b[39m# Convert negative indices to 0-vectors\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[39m# TODO: more options for UNK tokens\u001b[39;00m\n\u001b[0;32m 62\u001b[0m vectors_data[rows \u001b[39m<\u001b[39m \u001b[39m0\u001b[39m] \u001b[39m=\u001b[39m \u001b[39m0\u001b[39m\n", - "\u001b[1;31mRuntimeError\u001b[0m: [E896] There was an error using the static vectors. Ensure that the vectors of the vocab are properly initialized, or set 'include_static_vectors' to False." - ] - } - ], - "source": [ - "text = nlp(text)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "for ent in text.ents:\n", - " if ent.label_ == \"PERSON\":\n", - " # replace the entity with \"PRIVATE\" if it is a person's name\n", - " anonymized_text = str(anonymized_text).replace(ent.text, \"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\"text: \\n Pulipaka Email : \\nhttps://linkedin.com/in/srikarkashyap Mobile : \\nEducation\\n• Indiana University , IN, \\nMaster of Science in Computer Science; GPA: 3.9/4 Aug 2022 - May 2024 (expected)\\n◦ Courses: Applied , Computer Networks, Applied Machine Learning. Ongoing: ,\\nComputer Vision, .\\n• K L University Amaravati, \\nBachelor of Technology in Computer Science; GPA: 8.44/10 \\n◦ Courses: Data Structures and , Operating Systems, Discrete Mathematics, Artificial Intelligence.\\n◦ Honors: University Innovation Fellow at Stanford University. Department Gold Medalist for Best Outgoing\\nStudent (2014-18). Member of Student Academic Council. Associate President of Department Student Body.\\nExperience\\n• Heritage Foods Limited , \\nDeputy Manager (Data Scientist and ) Oct 2020 - Jul 2022 (Oct 2020 to Sep 2021 contract)\\n◦ REST API development: Developed a REST API for an internal mobile application in Flask. Included data\\nvalidation, type validation, input and output. Deployed on Heroku and actively used by the application.\\n(Python - flask, Postman, SQL)\\n◦ Forecast Model: Complete data science project life cycle including requirements gathering, data collection and\\nunderstanding, exploratory analysis, modeling and presentation. Developed a commodity price prediction model\\nwith a Mean Absolute Percentage Error (MAPE) of 3%. Used routine along with python stack (pandas,\\nplotly, jupyter) for testing.\\n◦ Customer Analytics: Complete data science life cycle. Analyzes vast amounts of data of customers (over 10\\nmillion rows) every month to assign grades to customers. Conducted multiple adoption sessions for users across\\n and explained how the system works and its usage. (Python, SQL, , Excel)\\nProjects\\nAutomation Software Projects\\nDeveloped multiple software automation scripts consuming external APIs for cost saving and strategic\\nplanning purposes. Saved the company around 15,00,000 rupees ($20,000) in annual recurring costs and helped\\ndesign Sales strategy by identifying high performing locations. (Python)\\nManufacturing Deviations\\nDeveloped a Python application that reads a TCP stream to detect manufacturing deviations and alert the\\nteams. Setup automatic backups and logging for easier debugging. Deployed across and actively used\\nhandling over 30,000 events per day. (Python, SQL, ).\\nCapstone Project - Machine Translation of English Videos into Regional Indian languages using\\nOpen Innovation\\nMachine Translation of English Videos into Regional Indian languages using Open Innovation: Using free APIs\\nto translate English educational videos into regional languages of using multithreading. Achieved a\\ntranslation time of 70 seconds for a 3 minute video. of the UC Berkeley Open Innovation Hackathon\\n2017. Upgraded program in 2022 to use OpenAI Whisper.\\n filter and classifier\\nDesigned and developed an email classifier and filter. Uses Naive Bayes algorithm for predicting\\nspam emails. Developed a User Interface that takes a user email as the input and provides a prediction.\\nSkills\\n◦ Languages, Frameworks and Tools: Python, SQL, , C, , .\\n◦ Libraries and Skills: Machine Learning, , Natural Language Processing, Computer Vision,\\nScikit-learn, Pandas, Numpy, Keras, TensorFlow, NLTK, , Requests, Design Thinking.\\n\\nitems:\\n[\\n {'start': 3505, 'end': 3513, 'entity_type': 'PERSON', 'text': '', 'operator': 'replace'},\\n {'start': 3336, 'end': 3346, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\\n {'start': 3324, 'end': 3334, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\\n {'start': 3309, 'end': 3319, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\\n {'start': 2808, 'end': 2818, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\\n {'start': 2510, 'end': 2520, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\\n {'start': 2430, 'end': 2440, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\\n {'start': 1883, 'end': 1891, 'entity_type': 'PERSON', 'text': '', 'operator': 'replace'},\\n {'start': 1808, 'end': 1818, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\\n {'start': 1509, 'end': 1517, 'entity_type': 'PERSON', 'text': '', 'operator': 'replace'},\\n {'start': 867, 'end': 877, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\\n {'start': 855, 'end': 865, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\\n {'start': 413, 'end': 423, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\\n {'start': 152, 'end': 162, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\\n {'start': 136, 'end': 146, 'entity_type': 'LOCATION', 'text': '', 'operator': 'replace'},\\n {'start': 90, 'end': 104, 'entity_type': 'PHONE_NUMBER', 'text': '', 'operator': 'replace'},\\n {'start': 27, 'end': 42, 'entity_type': 'EMAIL_ADDRESS', 'text': '', 'operator': 'replace'},\\n {'start': 1, 'end': 9, 'entity_type': 'PERSON', 'text': '', 'operator': 'replace'}\\n]\\n\"" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "anonymized_text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/app.py b/app.py index d37b502..4d4df9a 100644 --- a/app.py +++ b/app.py @@ -144,7 +144,7 @@ def questions(): answer = {"questions": []} return flask.render_template('index.html', error=True) end_gpt = time.time() - durations['GPT4 API Request'] = round(end_gpt - start_gpt, 3) + durations['GPT API Request'] = round(end_gpt - start_gpt, 3) return flask.render_template('questions.html', questions=dict(answer)['questions'], durations=durations) diff --git a/templates/index.html b/templates/index.html index 83c265b..fe39552 100644 --- a/templates/index.html +++ b/templates/index.html @@ -17,7 +17,7 @@ - Resume --> Interview Questions using GPT4 + Resume --> Interview Questions using GPT @@ -81,7 +81,7 @@

Estimated time: 30 seconds to 1 minute

-

ResumeQuestionsGPT: Generate Interview Questions from your resume using GPT4

+

ResumeQuestionsGPT: Generate Interview Questions from your resume using GPT

diff --git a/templates/questions.html b/templates/questions.html index 1e6777f..23fc0fb 100644 --- a/templates/questions.html +++ b/templates/questions.html @@ -47,7 +47,7 @@

ResumeQuestionsGPT: Probable Questions

-

Based on your resume, GPT4 has generated the following probable questions that you may be asked in an interview.

+

Based on your resume, GPT has generated the following probable questions that you may be asked in an interview.