forked from jldbc/gutenberg
-
Notifications
You must be signed in to change notification settings - Fork 0
/
populate_postgres.sql
54 lines (49 loc) · 1.47 KB
/
populate_postgres.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
/*
Note: need to remove non-unicode observations before populating the db.
Locally I did this by filtering for those characters in excel and dropping those
rows. Put this into the preprocessing script before shipping to web app.
*/
CREATE DATABASE gutenberg;
\connect gutenberg
CREATE TABLE features(
id INT,
file_name TEXT,
total_words INT,
avg_sentence_size REAL,
exclamation REAL,
pound_sign REAL,
quotes REAL,
pct_sign REAL,
dollar_sign REAL,
and_symbol REAL,
parentheses REAL,
plus_sign REAL,
asterisk REAL,
dash REAL,
comma REAL,
backslash REAL,
period REAL,
semicolon REAL,
colon REAL,
equals_sign REAL,
lessthan REAL,
question_mark REAL,
at_symbol REAL,
bracket REAL,
underscore REAL,
upward_carrot REAL,
apostrophe REAL,
squiggle_bracket REAL,
tilde REAL,
neg REAL,
neu REAL,
pos REAL,
compound REAL,
title TEXT,
author TEXT);
COPY features(id,file_name, total_words, avg_sentence_size, exclamation, pound_sign, quotes, pct_sign, dollar_sign, and_symbol, parentheses, plus_sign, asterisk, dash, comma, backslash, period, semicolon, colon, equals_sign, lessthan, question_mark, at_symbol, bracket, underscore, upward_carrot, apostrophe, squiggle_bracket, tilde, neg, neu, pos, compound, title, author)
FROM '/Users/jamesledoux/Documents/gutenberg/features_attempt.csv'
DELIMITER ','
ENCODING 'UTF8'
CSV HEADER;
select * from feats_full_test where author = 'William Shakespeare';