From 7219675d2e53d18132c3e2628c3dfc6c29840c6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christopher=20Sch=C3=B6lzel?=
 <christopher.schoelzel@gmx.net>
Date: Wed, 20 Mar 2024 22:27:44 +0100
Subject: [PATCH] adds a few ChatGPT-generated exercises

---
 notes/chat_gpt_prompts.md                     | 95 ++++++++++++++++++
 .../cosine_similarity_raw.md                  | 96 +++++++++++++++++++
 notes/raw_chatgpt_responses/is_anagram_raw.md | 65 +++++++++++++
 notes/raw_chatgpt_responses/ngram_raw.md      | 95 ++++++++++++++++++
 notes/raw_chatgpt_responses/pig_latin_raw.md  | 73 ++++++++++++++
 .../vowel_counter_raw.md                      | 74 ++++++++++++++
 .../word_frequency_raw.md                     | 68 +++++++++++++
 notes/raw_chatgpt_responses/word_wrap_raw.md  | 73 ++++++++++++++
 8 files changed, 639 insertions(+)
 create mode 100644 notes/chat_gpt_prompts.md
 create mode 100644 notes/raw_chatgpt_responses/cosine_similarity_raw.md
 create mode 100644 notes/raw_chatgpt_responses/is_anagram_raw.md
 create mode 100644 notes/raw_chatgpt_responses/ngram_raw.md
 create mode 100644 notes/raw_chatgpt_responses/pig_latin_raw.md
 create mode 100644 notes/raw_chatgpt_responses/vowel_counter_raw.md
 create mode 100644 notes/raw_chatgpt_responses/word_frequency_raw.md
 create mode 100644 notes/raw_chatgpt_responses/word_wrap_raw.md
diff --git a/notes/chat_gpt_prompts.md b/notes/chat_gpt_prompts.md
new file mode 100644
index 0000000..bed8d37
--- /dev/null
+++ b/notes/chat_gpt_prompts.md
@@ -0,0 +1,95 @@
+# ChatGPT prompts for exercise generation
+
+## Base prompt
+
+You are a teacher of a introductory Python course for computational linguists. You use type hinting and Python features up to Python 3.8. Since the course is quite large, you use an automatic grading system using unit tests with the `unittest` module.
+
+Please write a programming exercise for your students. Be precise in the wording. The programming exercise should take no more than 1 h to complete. Students should write a function that is then tested with a unit test. Please also provide the code for the unit test.
+
+For your answer, please use the same structure as the following example delimited by <exercise> and </exercise>:
+
+<exercise>
+<description>
+---
+title: Batchify
+author:
+- ChatGPT
+- Christopher Schölzel
+keywords:
+- language: python
+- course: Introduction to Python for computational linguists
+lang: en-US
+solution-size: 8  # measured in lines of code
+id: 5111a717-e313-49b5-b95f-8bec5a07e30e
+---
+
+# Batchify 
+
+Implement a function `batchify(input: str, batchSize: int) -> List[str]` in `batchify.py` which gets a string as input, as well as a number `batchSize`. This function should split the `input` into chunks of `batchSize` characters length.
+
+Example:
+
+```python
+result = batchify("ABCDEFGH",4)
+print(result) # -> ['ABCD','EFGH']
+
+result = batchify("ABCDEFGH",3)
+print(result) # -> ['ABC', 'DEF', 'GH']
+```
+
+
+> Tip: Use the slicing operator.
+
+Make sure to save the function in a file called `batchify.py` so that it can be imported with `from batchify import batchify`.
+
+</description>
+<unittest>
+
+```python
+import unittest
+
+from batchify import batchify
+
+class TestSplitting(unittest.TestCase):
+    
+    def testBatch4(self):
+        input = "Hallo Welt!"
+        result = batchify(input, 4)
+        self.assertEqual(["Hall", "o We", "lt!"], result)
+
+    def testBatch2(self):
+        input = "Hallo Welt!"
+        result = batchify(input, 2)
+        self.assertEqual(["Ha","ll", "o ", "We", "lt", "!"], result)
+
+if __name__ == '__main__':
+    unittest.main()
+```
+
+</unittest>
+<example_solution>
+
+```python
+from typing import List
+
+def batchify(input: str, batchSize: int) -> List[str]:
+	batches = []
+	for i in range(0, len(input), batchSize):
+		batches.append(input[i: i + batchSize])
+	return batches
+```
+</example_solution>
+</exercise>
+
+Please answer with the same structure as above for the following type of exercise: Create a ngram class that can generate an ngram model and make predictions based on it.
+
+## Variants of final Paragraph
+
+* Please answer with the same structure as above but choose a different exercise.
+* Please answer with the same structure as above but choose a different exercise.
+    The exercise should be more complex than the example.
+* Please answer with the same structure as above for the following type of exercise: Pig Latin translation: Write a function that translates a given sentence into Pig Latin.
+
+## Prompts for exercise ideas
+
+Please give me examples of programming exercises for an introductory Python course for computational linguists. The exercises should require to write a single function or class. Please list 10 easy, 10 medium, and 10 hard examples. Just write one sentence per example that gives a general idea what the exercise should be about. The exercises should only require to use the Python standard library and take no more than one hour.
\ No newline at end of file
diff --git a/notes/raw_chatgpt_responses/cosine_similarity_raw.md b/notes/raw_chatgpt_responses/cosine_similarity_raw.md
new file mode 100644
index 0000000..8a2a48a
--- /dev/null
+++ b/notes/raw_chatgpt_responses/cosine_similarity_raw.md
@@ -0,0 +1,96 @@
+<exercise>
+<description>
+---
+title: Sentence Similarity
+author:
+- ChatGPT
+- Christopher Schölzel
+keywords:
+- language: python
+- course: Introduction to Python for computational linguists
+lang: en-US
+solution-size: 11  # measured in lines of code
+id: 5111a717-e313-49b5-b95f-8bec5a07e31f
+---
+
+# Sentence Similarity
+
+Implement a function `sentence_similarity(sentence1: str, sentence2: str) -> float` in `sentence_similarity.py` which calculates the cosine similarity between two input sentences.
+
+The cosine similarity between two vectors A and B is calculated as:
+
+\[ \text{similarity} = \frac{A \cdot B}{\|A\| \times \|B\|} \]
+
+Where \(A \cdot B\) is the dot product of A and B, and \(\|A\|\) and \(\|B\|\) are the magnitudes of vectors A and B, respectively. For sentences, you'll treat them as Bag-of-Words vectors, where each word is a dimension and the value is the frequency of the word in the sentence.
+
+Example:
+
+```python
+similarity = sentence_similarity("I like bananas", "I like apples")
+print(similarity) # Output should be approximately 0.7071067811865475
+```
+
+You can ignore punctuation and consider words case-insensitively.
+
+> Tip: You can use the `Counter` class from the `collections` module to count word frequencies.
+
+Make sure to save the function in a file called `sentence_similarity.py` so that it can be imported with `from sentence_similarity import sentence_similarity`.
+
+</description>
+<unittest>
+
+```python
+import unittest
+from sentence_similarity import sentence_similarity
+
+class TestSimilarity(unittest.TestCase):
+
+    def test_similarity_same_sentence(self):
+        sentence1 = "I like bananas"
+        similarity = sentence_similarity(sentence1, sentence1)
+        self.assertEqual(1.0, similarity)
+
+    def test_similarity_different_sentences(self):
+        sentence1 = "I like bananas"
+        sentence2 = "I like apples"
+        similarity = sentence_similarity(sentence1, sentence2)
+        self.assertAlmostEqual(0.7071067811865475, similarity, places=5)
+
+    def test_similarity_different_order(self):
+        sentence1 = "I like bananas"
+        sentence2 = "bananas like I"
+        similarity = sentence_similarity(sentence1, sentence2)
+        self.assertEqual(1.0, similarity)
+
+if __name__ == '__main__':
+    unittest.main()
+```
+
+</unittest>
+<example_solution>
+
+```python
+from typing import List
+from collections import Counter
+import math
+
+def sentence_similarity(sentence1: str, sentence2: str) -> float:
+    def vectorize(sentence):
+        words = sentence.lower().split()
+        word_counts = Counter(words)
+        return word_counts
+
+    vec1 = vectorize(sentence1)
+    vec2 = vectorize(sentence2)
+
+    intersection = set(vec1.keys()) & set(vec2.keys())
+    dot_product = sum(vec1[word] * vec2[word] for word in intersection)
+
+    magnitude1 = math.sqrt(sum(vec1[word] ** 2 for word in vec1))
+    magnitude2 = math.sqrt(sum(vec2[word] ** 2 for word in vec2))
+
+    similarity = dot_product / (magnitude1 * magnitude2)
+    return similarity
+```
+</example_solution>
+</exercise>
\ No newline at end of file
diff --git a/notes/raw_chatgpt_responses/is_anagram_raw.md b/notes/raw_chatgpt_responses/is_anagram_raw.md
new file mode 100644
index 0000000..69063ab
--- /dev/null
+++ b/notes/raw_chatgpt_responses/is_anagram_raw.md
@@ -0,0 +1,65 @@
+<exercise>
+<description>
+---
+title: Anagram Checker
+author:
+- ChatGPT
+- Christopher Schölzel
+keywords:
+- language: python
+- course: Introduction to Python for computational linguists
+lang: en-US
+solution-size: 10  # measured in lines of code
+id: 93853c04-1782-4ed1-8275-1f3f934d635e
+---
+
+# Anagram Checker
+
+Create a function called `is_anagram(word1: str, word2: str) -> bool` in `anagram.py` that checks if two input strings are anagrams of each other. An anagram is a word or phrase formed by rearranging the letters of a different word or phrase, typically using all the original letters exactly once. The function should return `True` if the two words are anagrams, and `False` otherwise.
+
+Example:
+
+```python
+result = is_anagram("listen", "silent")
+print(result)  # -> True
+
+result = is_anagram("hello", "world")
+print(result)  # -> False
+```
+
+> Hint: You can use Python's Counter class to count occurrences of characters in each word.
+
+Make sure to save the function in a file called `anagram.py` so that it can be imported with `from anagram import is_anagram`.
+
+</description>
+<unittest>
+
+```python
+import unittest
+from anagram import is_anagram
+
+class TestAnagram(unittest.TestCase):
+    
+    def test_valid_anagram(self):
+        self.assertTrue(is_anagram("listen", "silent"))
+        self.assertTrue(is_anagram("rail safety", "fairy tales"))
+
+    def test_invalid_anagram(self):
+        self.assertFalse(is_anagram("hello", "world"))
+        self.assertFalse(is_anagram("python", "pythons"))
+
+if __name__ == '__main__':
+    unittest.main()
+```
+
+</unittest>
+<example_solution>
+
+```python
+from collections import Counter
+
+def is_anagram(word1: str, word2: str) -> bool:
+    return Counter(word1) == Counter(word2)
+```
+</example_solution>
+</exercise>
\ No newline at end of file
diff --git a/notes/raw_chatgpt_responses/ngram_raw.md b/notes/raw_chatgpt_responses/ngram_raw.md
new file mode 100644
index 0000000..f0cb033
--- /dev/null
+++ b/notes/raw_chatgpt_responses/ngram_raw.md
@@ -0,0 +1,95 @@
+<exercise>
+<description>
+---
+title: Ngram Model
+author:
+- ChatGPT
+- Christopher Schölzel
+keywords:
+- language: python
+- course: Introduction to Python for computational linguists
+lang: en-US
+solution-size: 20  # measured in lines of code
+id: 2d9898f0-5e25-40c2-8d7c-f842c01166e1
+---
+
+# Ngram Model
+
+Create a class `NgramModel` in `ngram.py` that can generate an ngram model and make predictions based on it. The class should have the following methods:
+
+1. `__init__(self, n: int)` - Initialize the ngram model with the value of `n`, the order of the ngrams.
+2. `train(self, text: str)` - Train the model using the provided text. This method should build the ngram model.
+3. `predict(self, prefix: str) -> List[str]` - Given a prefix, predict the next word based on the trained ngram model. Return a list of possible predictions sorted by their probabilities.
+
+Example:
+
+```python
+model = NgramModel(2)
+model.train("The quick brown fox jumps over the lazy dog")
+predictions = model.predict("The quick")
+print(predictions)  # -> ['brown', 'fox']
+```
+
+Make sure to save the class in a file called `ngram.py` so that it can be imported with `from ngram import NgramModel`.
+
+</description>
+<unittest>
+
+```python
+import unittest
+
+from ngram import NgramModel
+
+class TestNgramModel(unittest.TestCase):
+
+    def test_ngram_2(self):
+        model = NgramModel(2)
+        model.train("The quick brown fox jumps over the lazy dog")
+        predictions = model.predict("The quick")
+        self.assertCountEqual(["brown", "fox"], predictions)
+
+    def test_ngram_3(self):
+        model = NgramModel(3)
+        model.train("The quick brown fox jumps over the lazy dog")
+        predictions = model.predict("quick brown")
+        self.assertCountEqual(["fox"], predictions)
+
+    def test_ngram_4(self):
+        model = NgramModel(3)
+        model.train("The quick brown fox jumps over the lazy dog")
+        predictions = model.predict("jumps over")
+        self.assertCountEqual(["the"], predictions)
+
+if __name__ == '__main__':
+    unittest.main()
+```
+
+</unittest>
+<example_solution>
+
+```python
+from typing import List, Dict
+from collections import defaultdict
+import random
+
+class NgramModel:
+    def __init__(self, n: int):
+        self.n = n
+        self.ngrams = defaultdict(list)
+
+    def train(self, text: str):
+        words = text.split()
+        for i in range(len(words) - self.n):
+            prefix = tuple(words[i:i + self.n - 1])
+            next_word = words[i + self.n - 1]
+            self.ngrams[prefix].append(next_word)
+
+    def predict(self, prefix: str) -> List[str]:
+        prefix_words = prefix.split()
+        prefix = tuple(prefix_words[-self.n + 1:])
+        possible_next_words = self.ngrams.get(prefix, [])
+        return sorted(possible_next_words, key=lambda w: self.ngrams[prefix].count(w), reverse=True)
+
+```
+</example_solution>
+</exercise>
\ No newline at end of file
diff --git a/notes/raw_chatgpt_responses/pig_latin_raw.md b/notes/raw_chatgpt_responses/pig_latin_raw.md
new file mode 100644
index 0000000..ed83e46
--- /dev/null
+++ b/notes/raw_chatgpt_responses/pig_latin_raw.md
@@ -0,0 +1,73 @@
+<exercise>
+<description>
+---
+title: Pig Latin Translation
+author:
+- ChatGPT
+- Christopher Schölzel
+keywords:
+- language: python
+- course: Introduction to Python for computational linguists
+lang: en-US
+solution-size: 11  # measured in lines of code
+id: 5111a717-e313-49b5-b95f-8bec5a07e30e
+---
+
+# Pig Latin Translation
+
+Implement a function `pig_latin(sentence: str) -> str` in `piglatin.py` which translates a given sentence into Pig Latin. In Pig Latin, you move the first letter of each word to the end of the word and add "ay". Words that start with a vowel (a, e, i, o, u) simply have "ay" added to the end.
+
+Example:
+
+```python
+result = pig_latin("hello world")
+print(result) # -> "ellohay orldway"
+```
+
+> Tip: You can use string manipulation methods like `split()` and string slicing.
+
+Make sure to save the function in a file called `piglatin.py` so that it can be imported with `from piglatin import pig_latin`.
+
+</description>
+<unittest>
+
+```python
+import unittest
+
+from piglatin import pig_latin
+
+class TestPigLatin(unittest.TestCase):
+    
+    def testPigLatin(self):
+        sentence = "hello world"
+        result = pig_latin(sentence)
+        self.assertEqual("ellohay orldway", result)
+
+    def testPigLatin2(self):
+        sentence = "python is fun"
+        result = pig_latin(sentence)
+        self.assertEqual("ythonpay isay unfay", result)
+
+if __name__ == '__main__':
+    unittest.main()
+```
+
+</unittest>
+<example_solution>
+
+```python
+def pig_latin(sentence: str) -> str:
+    vowels = "aeiou"
+    words = sentence.split()
+    pig_latin_words = []
+    
+    for word in words:
+        if word[0].lower() in vowels:
+            pig_latin_words.append(word + "ay")
+        else:
+            pig_latin_words.append(word[1:] + word[0] + "ay")
+    
+    return " ".join(pig_latin_words)
+```
+</example_solution>
+</exercise>
\ No newline at end of file
diff --git a/notes/raw_chatgpt_responses/vowel_counter_raw.md b/notes/raw_chatgpt_responses/vowel_counter_raw.md
new file mode 100644
index 0000000..701e4ea
--- /dev/null
+++ b/notes/raw_chatgpt_responses/vowel_counter_raw.md
@@ -0,0 +1,74 @@
+<exercise>
+<description>
+---
+title: Vowel Counter
+author:
+- ChatGPT
+- Christopher Schölzel
+keywords:
+- language: python
+- course: Introduction to Python for computational linguists
+lang: en-US
+solution-size: 5  # measured in lines of code
+id: 37a7dacc-07ef-4b15-a32e-cd963e252bc7
+---
+
+# Vowel Counter
+
+Write a function `count_vowels(input: str) -> int` in `vowel_counter.py` that takes a string `input` and returns the number of vowels (a, e, i, o, u) in the input string.
+
+Example:
+
+```python
+result = count_vowels("hello")
+print(result) # -> 2
+
+result = count_vowels("python")
+print(result) # -> 1
+```
+
+Make sure to save the function in a file called `vowel_counter.py` so that it can be imported with `from vowel_counter import count_vowels`.
+
+</description>
+<unittest>
+
+```python
+import unittest
+
+from vowel_counter import count_vowels
+
+class TestCountVowels(unittest.TestCase):
+    
+    def testNoVowels(self):
+        input = "rhythm"
+        result = count_vowels(input)
+        self.assertEqual(0, result)
+
+    def testAllVowels(self):
+        input = "aeiou"
+        result = count_vowels(input)
+        self.assertEqual(5, result)
+
+    def testMixedCase(self):
+        input = "AbCdEfGhIj"
+        result = count_vowels(input)
+        self.assertEqual(3, result)
+
+if __name__ == '__main__':
+    unittest.main()
+```
+
+</unittest>
+<example_solution>
+
+```python
+def count_vowels(input: str) -> int:
+    vowels = "aeiou"
+    count = 0
+    for char in input:
+        if char.lower() in vowels:
+            count += 1
+    return count
+```
+</example_solution>
+</exercise>
\ No newline at end of file
diff --git a/notes/raw_chatgpt_responses/word_frequency_raw.md b/notes/raw_chatgpt_responses/word_frequency_raw.md
new file mode 100644
index 0000000..d6681e8
--- /dev/null
+++ b/notes/raw_chatgpt_responses/word_frequency_raw.md
@@ -0,0 +1,68 @@
+<exercise>
+<description>
+---
+title: Word Frequency Counter
+author:
+- ChatGPT
+- Christopher Schölzel
+keywords:
+- language: python
+- course: Introduction to Python for computational linguists
+lang: en-US
+solution-size: 10  # measured in lines of code
+id: 42f3d804-ecb2-4fa0-b5aa-14e330a2f3e7
+---
+
+# Word Frequency Counter
+
+Write a function `word_frequency(text: str) -> dict` in a file named `word_frequency.py`. The function should take a string `text` as input and return a dictionary where keys are words in the text, and values are the frequencies of those words in the text.
+
+The function should be case-insensitive, meaning "Word" and "word" should be considered the same word.
+
+Example:
+
+```python
+result = word_frequency("This is a test. This is only a test.")
+print(result) # -> {'this': 2, 'is': 2, 'a': 2, 'test': 2, 'only': 1}
+```
+
+> Tip: You can use the `split()` method to split the text into words and `lower()` method to convert words to lowercase.
+
+</description>
+<unittest>
+
+```python
+import unittest
+
+from word_frequency import word_frequency
+
+class TestWordFrequency(unittest.TestCase):
+    
+    def testSimpleSentence(self):
+        input_text = "This is a test. This is only a test."
+        result = word_frequency(input_text)
+        expected_result = {'this': 2, 'is': 2, 'a': 2, 'test': 2, 'only': 1}
+        self.assertEqual(expected_result, result)
+
+    def testEmptyText(self):
+        input_text = ""
+        result = word_frequency(input_text)
+        self.assertEqual({}, result)
+
+if __name__ == '__main__':
+    unittest.main()
+```
+
+</unittest>
+<example_solution>
+
+```python
+def word_frequency(text: str) -> dict:
+    word_list = text.lower().split()
+    frequency_dict = {}
+    for word in word_list:
+        frequency_dict[word] = frequency_dict.get(word, 0) + 1
+    return frequency_dict
+```
+</example_solution>
+</exercise>
\ No newline at end of file
diff --git a/notes/raw_chatgpt_responses/word_wrap_raw.md b/notes/raw_chatgpt_responses/word_wrap_raw.md
new file mode 100644
index 0000000..7e5b02e
--- /dev/null
+++ b/notes/raw_chatgpt_responses/word_wrap_raw.md
@@ -0,0 +1,73 @@
+<exercise>
+<description>
+---
+title: Word Wrap
+author:
+- ChatGPT
+- Christopher Schölzel
+keywords:
+- language: python
+- course: Introduction to Python for computational linguists
+lang: en-US
+solution-size: 7  # measured in lines of code
+id: e5ad5b7d-aa1b-4a2b-b9f3-75f897cf6d09
+---
+
+# Word Wrap
+
+Implement a function `word_wrap(text: str, columns: int) -> List[str]` in `word_wrap.py` which takes a long string `text` and wraps it to fit within a given number of `columns` without breaking words. 
+
+Example:
+
+```python
+result = word_wrap("Lorem ipsum dolor sit amet, consectetur adipiscing elit.", 20)
+print(result)
+# Output:
+# ['Lorem ipsum dolor', 'sit amet,', 'consectetur', 'adipiscing elit.']
+
+result = word_wrap("This is a short text.", 10)
+print(result)
+# Output:
+# ['This is a', 'short text.']
+```
+
+> Tip: You may need to use the `textwrap` module.
+
+Make sure to save the function in a file called `word_wrap.py` so that it can be imported with `from word_wrap import word_wrap`.
+
+</description>
+<unittest>
+
+```python
+import unittest
+
+from word_wrap import word_wrap
+
+class TestWordWrap(unittest.TestCase):
+    
+    def test_wrap20(self):
+        text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
+        result = word_wrap(text, 20)
+        self.assertEqual(["Lorem ipsum dolor", "sit amet,", "consectetur", "adipiscing elit."], result)
+
+    def test_wrap10(self):
+        text = "This is a short text."
+        result = word_wrap(text, 10)
+        self.assertEqual(["This is a", "short text."], result)
+
+if __name__ == '__main__':
+    unittest.main()
+```
+
+</unittest>
+<example_solution>
+
+```python
+from typing import List
+import textwrap
+
+def word_wrap(text: str, columns: int) -> List[str]:
+    return textwrap.wrap(text, width=columns)
+```
+</example_solution>
+</exercise>
\ No newline at end of file