diff --git a/notebooks/llmgraph_example.ipynb b/notebooks/llmgraph_example.ipynb new file mode 100644 index 0000000..10018aa --- /dev/null +++ b/notebooks/llmgraph_example.ipynb @@ -0,0 +1,905 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "JarYwM4FxV5x" + }, + "source": [ + "# llmgraph\n", + "\n", + "[![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dylanhogg/llmgraph/blob/master/notebooks/llmgraph.ipynb)\n", + "\n", + "Create knowledge graphs with LLMs.\n", + "\n", + "https://github.com/dylanhogg/llmgraph\n", + "\n", + "\"drawing\"\n", + "\n", + "llmgraph enables you to create knowledge graphs in [GraphML](http://graphml.graphdrawing.org/), [GEXF](https://gexf.net/), and HTML formats (generated via [pyvis](https://github.com/WestHealth/pyvis)) from a given source entity Wikipedia page. The knowledge graphs are generated by extracting world knowledge from ChatGPT or other large language models (LLMs) as supported by [LiteLLM](https://github.com/BerriAI/litellm).\n", + "\n", + "For a background on knowledge graphs see a [youtube overview by Computerphile](https://www.youtube.com/watch?v=PZBm7M0HGzw)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZsrezNA9LppM" + }, + "source": [ + "## Install llmgraph" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "Og6vjqOqxO9X" + }, + "outputs": [], + "source": [ + "# Install llmgraph from pypi (https://pypi.org/project/llmgraph/)\n", + "# (Ignore any dependency resolver issues on Google Colab, they're fine)\n", + "%pip install llmgraph -q" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "_dwPBofNxP0z", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "84d515e3-6483-4e15-8412-8cd4f5f8c5a2" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "llmgraph 1.2.1\n" + ] + } + ], + "source": [ + "# Display installed llmgraph version\n", + "%pip list | grep llmgraph" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cpVE4aQjLraA" + }, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "cN81mjiLzL5H" + }, + "outputs": [], + "source": [ + "import IPython\n", + "import os\n", + "import getpass\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uuq7Q3AULwXx" + }, + "source": [ + "## Enter your OpenAI API Key" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "uXaylu7pzjqV", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4f573ee2-b154-43c9-a555-6b1c4809a96d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enter your OpenAI API Key··········\n" + ] + } + ], + "source": [ + "# Set OPENAI_API_KEY from user input (hidden in UI via getpass function)\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API Key\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8gY_-vhLL4Uf" + }, + "source": [ + "## Run llmgraph command" + ] + }, + { + "cell_type": "code", + "source": [ + "!llmgraph --help" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p-s6hACUKyeN", + "outputId": "174747c5-0faa-4969-c85b-600b0249330b" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[1m \u001b[0m\n", + "\u001b[1m \u001b[0m\u001b[1;33mUsage: \u001b[0m\u001b[1mllmgraph [OPTIONS] ENTITY_TYPE ENTITY_WIKIPEDIA\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", + "\u001b[1m \u001b[0m\n", + " Create knowledge graphs with LLMs \n", + " \n", + "\u001b[2m╭─\u001b[0m\u001b[2m Arguments \u001b[0m\u001b[2m─────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[2m─╮\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[31m*\u001b[0m entity_type \u001b[1;33mTEXT\u001b[0m Entity type (e.g. movie) \u001b[2m[default: None]\u001b[0m \u001b[2;31m[required]\u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[31m*\u001b[0m entity_wikipedia \u001b[1;33mTEXT\u001b[0m Full wikipedia link to root entity \u001b[2m[default: None]\u001b[0m \u001b[2;31m[required]\u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", + "\u001b[2m╭─\u001b[0m\u001b[2m Options \u001b[0m\u001b[2m───────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[2m─╮\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-entity\u001b[0m\u001b[1;36m-root\u001b[0m \u001b[1;33mTEXT \u001b[0m Optional root entity \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m name override if \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m different from \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m wikipedia page title \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[2m[default: None] \u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-levels\u001b[0m \u001b[1;33mINTEGER \u001b[0m Number of levels deep \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m to construct from the \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m central root entity \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[2m[default: 2] \u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-max\u001b[0m\u001b[1;36m-sum-total-tokens\u001b[0m \u001b[1;33mINTEGER \u001b[0m Maximum sum of tokens \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m for graph generation \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[2m[default: 200000] \u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-output\u001b[0m\u001b[1;36m-folder\u001b[0m \u001b[1;33mTEXT \u001b[0m Folder location to \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m write outputs \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[2m[default: ./_output/] \u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-llm\u001b[0m\u001b[1;36m-model\u001b[0m \u001b[1;33mTEXT \u001b[0m The model name \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[2m[default: \u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[2mgpt-3.5-turbo] \u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-llm\u001b[0m\u001b[1;36m-temp\u001b[0m \u001b[1;33mFLOAT \u001b[0m LLM temperature value \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[2m[default: 0.0] \u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-llm\u001b[0m\u001b[1;36m-base-url\u001b[0m \u001b[1;33mTEXT \u001b[0m LLM will use custom \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m base URL instead of \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m the automatic one \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[2m[default: None] \u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-allow\u001b[0m\u001b[1;36m-user-input\u001b[0m \u001b[1;35m-\u001b[0m\u001b[1;35m-no\u001b[0m\u001b[1;35m-allow-user-input\u001b[0m \u001b[1;33m \u001b[0m Allow command line \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m user input \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[2m[default: \u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[2mallow-user-input] \u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-version\u001b[0m \u001b[1;33m \u001b[0m Display llmgraph \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m version \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-install\u001b[0m\u001b[1;36m-completion\u001b[0m \u001b[1;2;33m[\u001b[0m\u001b[1;33mbash\u001b[0m\u001b[1;2;33m|\u001b[0m\u001b[1;33mzsh\u001b[0m\u001b[1;2;33m|\u001b[0m\u001b[1;33mfish\u001b[0m\u001b[1;2;33m|\u001b[0m\u001b[1;33mpowers\u001b[0m Install completion for \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;33mhell\u001b[0m\u001b[1;2;33m|\u001b[0m\u001b[1;33mpwsh\u001b[0m\u001b[1;2;33m]\u001b[0m\u001b[1;33m \u001b[0m the specified shell. \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[2m[default: None] \u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-show\u001b[0m\u001b[1;36m-completion\u001b[0m \u001b[1;2;33m[\u001b[0m\u001b[1;33mbash\u001b[0m\u001b[1;2;33m|\u001b[0m\u001b[1;33mzsh\u001b[0m\u001b[1;2;33m|\u001b[0m\u001b[1;33mfish\u001b[0m\u001b[1;2;33m|\u001b[0m\u001b[1;33mpowers\u001b[0m Show completion for \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;33mhell\u001b[0m\u001b[1;2;33m|\u001b[0m\u001b[1;33mpwsh\u001b[0m\u001b[1;2;33m]\u001b[0m\u001b[1;33m \u001b[0m the specified shell, \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m to copy it or \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m customize the \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m installation. \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[2m[default: None] \u001b[0m \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m \u001b[1;36m-\u001b[0m\u001b[1;36m-help\u001b[0m \u001b[1;33m \u001b[0m Show this message and \u001b[2m│\u001b[0m\n", + "\u001b[2m│\u001b[0m exit. \u001b[2m│\u001b[0m\n", + "\u001b[2m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "wSux5AnAxP25", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "093e9602-d635-4092-b4aa-bfafbc64ce6f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Running with \u001b[33mentity_type\u001b[0m=\u001b[32m'concepts-general'\u001b[0m, \n", + "\u001b[33mentity_wikipedia\u001b[0m=\u001b[32m'https://en.wikipedia.org/wiki/Large_language_model'\u001b[0m, \u001b[33mentity_root\u001b[0m=\u001b[32m'Large language \u001b[0m\n", + "\u001b[32mmodel'\u001b[0m, \u001b[33mcustom_entity_root\u001b[0m=\u001b[3;91mFalse\u001b[0m, \u001b[33mlevels\u001b[0m=\u001b[1;36m3\u001b[0m, \u001b[33mllm_model\u001b[0m=\u001b[32m'gpt-3.5-turbo'\u001b[0m, \u001b[33mllm_temp\u001b[0m=\u001b[1;36m0\u001b[0m\u001b[1;36m.0\u001b[0m, \n", + "\u001b[33moutput_folder\u001b[0m=\u001b[32m'./_output/'\u001b[0m\n", + "Reading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Large_language_model\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mLarge language model\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m1\u001b[0m, total tokens \u001b[1;36m0\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Transformer_\u001b[0m\u001b[1;4;32m(\u001b[0m\u001b[1;4;32mmachine_learning_model\u001b[0m\u001b[1;4;32m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/BERT_\u001b[0m\u001b[1;4;32m(\u001b[0m\u001b[1;4;32mlanguage_model\u001b[0m\u001b[1;4;32m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/GPT_\u001b[0m\u001b[1;4;32m(\u001b[0m\u001b[1;4;32mlanguage_model\u001b[0m\u001b[1;4;32m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/XLNet_\u001b[0m\u001b[1;4;32m(\u001b[0m\u001b[1;4;32mlanguage_model\u001b[0m\u001b[1;4;32m)\u001b[0m \u001b[1m(\u001b[0m\u001b[1;36m404\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/T5_\u001b[0m\u001b[1;4;32m(\u001b[0m\u001b[1;4;32mlanguage_model\u001b[0m\u001b[1;4;32m)\u001b[0m \u001b[1m(\u001b[0m\u001b[1;36m404\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mTransformer \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mmachine learning model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m2\u001b[0m, total tokens \u001b[1;36m610\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/OpenAI#GPT_series\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/XLNet\u001b[0m \u001b[1m(\u001b[0m\u001b[1;36m404\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/GPT-3\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/T5_\u001b[0m\u001b[1;4;32m(\u001b[0m\u001b[1;4;32mtext-to-text_transfer_transformer\u001b[0m\u001b[1;4;32m)\u001b[0m \u001b[1m(\u001b[0m\u001b[1;36m404\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mBERT \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m2\u001b[0m, total tokens \u001b[1;36m1\u001b[0m,\u001b[1;36m228\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/OpenAI#GPT\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/ELMo\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Word2vec\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/ULMFiT\u001b[0m \u001b[1m(\u001b[0m\u001b[1;36m404\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mGPT \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m2\u001b[0m, total tokens \u001b[1;36m1\u001b[0m,\u001b[1;36m850\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/OpenAI\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Transformer_\u001b[0m\u001b[1;4;32m(\u001b[0m\u001b[1;4;32mneural_network\u001b[0m\u001b[1;4;32m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Language_modeling\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mXLNet \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m2\u001b[0m, total tokens \u001b[1;36m2\u001b[0m,\u001b[1;36m420\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/RoBERTa\u001b[0m \u001b[1m(\u001b[0m\u001b[1;36m404\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/ALBERT_\u001b[0m\u001b[1;4;32m(\u001b[0m\u001b[1;4;32mlanguage_model\u001b[0m\u001b[1;4;32m)\u001b[0m \u001b[1m(\u001b[0m\u001b[1;36m404\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mT5 \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m2\u001b[0m, total tokens \u001b[1;36m2\u001b[0m,\u001b[1;36m995\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/OpenAI#GPT-3\u001b[0m\n", + "\u001b[2KOutput html: \n", + "\u001b[32m'_output/concepts-general/large-language-model/concepts-general_large-language-model_v1.2.1_level2_i\u001b[0m\n", + "\u001b[32mncl_unprocessed.html'\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m2\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mGPT \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m3\u001b[0m,\u001b[1;36m505\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Recurrent_neural_network\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Deep_learning\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Natural_language_processing\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mXLNet \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m4\u001b[0m,\u001b[1;36m132\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mGPT-\u001b[0m\u001b[1;32m3\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m4\u001b[0m,\u001b[1;36m736\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/OpenAI#GPT-2\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Artificial_intelligence\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mT5 \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m5\u001b[0m,\u001b[1;36m220\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mGPT \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m5\u001b[0m,\u001b[1;36m765\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mELMo \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m6\u001b[0m,\u001b[1;36m333\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/FastText\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mWord2Vec \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mword embedding model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m6\u001b[0m,\u001b[1;36m850\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/GloVe_\u001b[0m\u001b[1;4;32m(\u001b[0m\u001b[1;4;32mmachine_learning\u001b[0m\u001b[1;4;32m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Doc2vec\u001b[0m \u001b[1m(\u001b[0m\u001b[1;36m404\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mULMFiT \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m7\u001b[0m,\u001b[1;36m377\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mOpenAI\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m7\u001b[0m,\u001b[1;36m944\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Machine_learning\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Reinforcement_learning\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mTransformer \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mneural network\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m8\u001b[0m,\u001b[1;36m400\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mLanguage modeling\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m8\u001b[0m,\u001b[1;36m881\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Statistical_language_modeling\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Machine_translation\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Speech_recognition\u001b[0m\n", + "\u001b[2KReading \u001b[1;4;32mhttps://en.wikipedia.org/wiki/Information_retrieval\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mRoBERTa \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m9\u001b[0m,\u001b[1;36m397\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mALBERT \u001b[0m\u001b[1;32m(\u001b[0m\u001b[1;32mlanguage model\u001b[0m\u001b[1;32m)\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m9\u001b[0m,\u001b[1;36m919\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing \u001b[1;32mGPT-\u001b[0m\u001b[1;32m3\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m, total tokens \u001b[1;36m10\u001b[0m,\u001b[1;36m497\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KOutput html: \n", + "\u001b[32m'_output/concepts-general/large-language-model/concepts-general_large-language-model_v1.2.1_level3_i\u001b[0m\n", + "\u001b[32mncl_unprocessed.html'\u001b[0m \u001b[1m(\u001b[0mlevel \u001b[1;36m3\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2KProcessing level 3: \u001b[35m 100%\u001b[0m \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3/3 \u001b[0m [ \u001b[33m0:02:28\u001b[0m < \u001b[36m0:00:00\u001b[0m , \u001b[31m? it/s\u001b[0m ]\n", + "\u001b[?25h\n", + "\u001b[1;32mllmgraph finished, took \u001b[0m\u001b[1;32m148.\u001b[0m\u001b[1;32m994283s.\u001b[0m\n", + "Output written to folder \u001b[32m'_output/concepts-general/large-language-model'\u001b[0m which includes, for each \n", + "level:\n", + " - An html file with only processed nodes as a fully connected graph\n", + " - An html file with both processed and extra unprocessed edge nodes\n", + " - A .graphml file \u001b[1m(\u001b[0msee \u001b[4;94mhttp://graphml.graphdrawing.org/\u001b[0m\u001b[4;94m)\u001b[0m\n", + " - A .gefx file, good for viewing in gephi \u001b[1m(\u001b[0msee \u001b[4;94mhttps://gexf.net/\u001b[0m and \u001b[4;94mhttps://gephi.org/\u001b[0m\u001b[4;94m)\u001b[0m\n", + "\n", + "Thank you for using llmgraph! Please consider starring the project on github: \n", + "\u001b[4;94mhttps://github.com/dylanhogg/llmgraph\u001b[0m\n" + ] + } + ], + "source": [ + "# Run llmgraph\n", + "# Note: valid `entity_type` values are found here: https://github.com/dylanhogg/llmgraph/blob/main/llmgraph/prompts.yaml\n", + "!llmgraph concepts-general https://en.wikipedia.org/wiki/Large_language_model --levels 3 --llm-model gpt-3.5-turbo --llm-temp 0.0 --no-allow-user-input" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y4-g6rk6L_B5" + }, + "source": [ + "## Locate the output files" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "pDuAtHMq9ZaX", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "33b02dd3-4149-402e-e98f-dd7f278bd1c4" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "_output/concepts-general/large-language-model/concepts-general_large-language-model_v1.2.1_level3_fully_connected.html\n", + "_output/concepts-general/large-language-model/concepts-general_large-language-model_v1.2.1_level3.graphml\n" + ] + } + ], + "source": [ + "# Get list of book html files from the _output folder\n", + "html_files = []\n", + "graphml_files = []\n", + "for root, dirs, files in os.walk(\"_output\"):\n", + " if not dirs:\n", + " html_files.extend([str(Path(root) / f) for f in files if f.endswith(\"fully_connected.html\")])\n", + " graphml_files.extend([str(Path(root) / f) for f in files if f.endswith(\".graphml\")])\n", + "html_files = sorted(html_files)\n", + "graphml_files = sorted(graphml_files)\n", + "html_file = html_files[-1]\n", + "graphml_file = graphml_files[-1]\n", + "\n", + "print(html_file)\n", + "print(graphml_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "z1UZdk2WJXWF" + }, + "outputs": [], + "source": [ + "# Uncomment these lines to download book html (or find it in the file tree on the left)\n", + "# from google.colab import files\n", + "# files.download(book_file)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Display the network" + ], + "metadata": { + "id": "RFixDPuJo11k" + } + }, + { + "cell_type": "code", + "source": [ + "import networkx as nx\n", + "import matplotlib.pyplot as plt\n", + "from pyvis.network import Network" + ], + "metadata": { + "id": "7egMzj8jfAV6" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Load graphml file\n", + "G = nx.read_graphml(graphml_file)\n", + "# G = nx.read_graphml(\"_output/concepts-general/large-language-model/concepts-general_large-language-model_v1.2.1_level3.graphml\")\n", + "\n", + "# Create pyvis network for displaying\n", + "nt = Network(height=\"800px\", width=\"100%\", directed=True, cdn_resources=\"remote\", notebook=True)\n", + "nt.from_nx(G)\n", + "nt.force_atlas_2based(\n", + " spring_strength=0.03\n", + ")" + ], + "metadata": { + "id": "hPGvtkhTlV33" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Display pyviz network\n", + "nt.save_graph(\"llmgraph.html\")\n", + "IPython.display.HTML(filename=\"llmgraph.html\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 827 + }, + "id": "p62WN_oclV8X", + "outputId": "49d0e7e6-4f35-4314-9219-86ae31088e88" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "

\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + "
\n", + "

\n", + "
\n", + " \n", + " \n", + "\n", + "\n", + " \n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + "\n", + " \n", + " \n", + "\n", + " \n", + " \n", + "" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "DDdCVOn4lV-Q" + }, + "execution_count": 11, + "outputs": [] + } + ] +}