Skip to content

Commit 36ead66

Browse files
let's try to push to github
1 parent c639201 commit 36ead66

22 files changed

+411
-369
lines changed

1. mirror_tweets_in_neo4j.js

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import * as neo4j from 'neo4j-driver'
2+
import needle from 'needle';
3+
import credentials from './credentials/Twitter.js';
4+
5+
// Globals
6+
const TOKEN = credentials.auth_tokens.BEARER_TOKEN;
7+
const GET_ENDPOINT = "https://api.twitter.com/2/tweets";
8+
const SEARCH_RECENT_ENDPOINT = "https://api.twitter.com/2/tweets/search/recent";
9+
10+
// https://twitter.com/briantylercohen/status/1369403905956847618 <- use this!!!!
11+
// ^ try this one
12+
13+
////////////////////////////////////////////////////////////////////////////////
14+
////////////////////////////////// MAIN //////////////////////////////////////
15+
////////////////////////////////////////////////////////////////////////////////
16+
17+
// Startup database
18+
const driver = neo4j.driver('bolt://localhost:7687', neo4j.auth.basic('neo4j', "password"))
19+
const session = driver.session()
20+
21+
22+
const rootTweetId = "1372316893739163652";
23+
const rootTweet = await addTweetThreadToDb(rootTweetId);
24+
25+
// Close database
26+
await driver.close()
27+
28+
////////////////////////////////////////////////////////////////////////////////
29+
//////////////////////////// ENDPOINT FUNCTIONS ///////////////////////////////
30+
////////////////////////////////////////////////////////////////////////////////
31+
32+
async function get(endpointURL, params) {
33+
const res = await needle('get', endpointURL, params, { headers: {
34+
"authorization": `Bearer ${TOKEN}`
35+
}})
36+
37+
if (res.body) {
38+
const ret = {
39+
body: res.body,
40+
headers: res.headers
41+
}
42+
return ret;
43+
} else {
44+
throw new Error('Unsuccessful request')
45+
}
46+
}
47+
48+
async function getTweet(id) {
49+
if (id == null) throw new Error ("ID not specified in getTweets");
50+
const params = {
51+
"ids": id,
52+
"tweet.fields": "author_id,conversation_id,created_at,entities,lang,referenced_tweets,text",
53+
}
54+
return await get(GET_ENDPOINT, params);
55+
}
56+
57+
async function recentSearch(conversation_id, next_token) {
58+
if (conversation_id == null) throw new Error ("ID not specified in recentSearch");
59+
const params = {
60+
"query":`conversation_id:${conversation_id}`,
61+
"max_results": 100,
62+
"tweet.fields": "author_id,conversation_id,created_at,entities,lang,referenced_tweets,text",
63+
}
64+
if(next_token) {params.next_token = next_token}
65+
66+
return await get(SEARCH_RECENT_ENDPOINT, params);
67+
}
68+
69+
////////////////////////////////////////////////////////////////////////////////
70+
//////////////////////////// RETRIEVAL FUNCTIONS //////////////////////////////
71+
////////////////////////////////////////////////////////////////////////////////
72+
73+
async function addTweetThreadToDb(id) {
74+
// Get first page
75+
const conversation_id = (await getTweet(id)).body.data[0].conversation_id;
76+
var page = await recentSearch(conversation_id);
77+
78+
for(const tweet in page.body.data) {
79+
await addTweetToDatabase(page.body.data[tweet]);
80+
}
81+
82+
// If more pages...
83+
while(page.body.meta.next_token) {
84+
var page = await recentSearch(id, page.body.meta.next_token);
85+
86+
for(const tweet in page.body.data) {
87+
await addTweetToDatabase(page.body.data[tweet]);
88+
}
89+
90+
if(page.headers["x-rate-limit-remaining"] <= 0) {
91+
console.log("I'm rate limited... Pausing execution for " + page.headers["x-rate-limit-reset"] + " seconds.")
92+
await sleep(page.headers["x-rate-limit-remaining"]*1000)
93+
}
94+
}
95+
}
96+
97+
function sleep(ms) {
98+
return new Promise(resolve => setTimeout(resolve, ms));
99+
}
100+
101+
////////////////////////////////////////////////////////////////////////////////
102+
//////////////////////////// DATABASE FUNCTIONS ///////////////////////////////
103+
////////////////////////////////////////////////////////////////////////////////
104+
async function addTweetToDatabase(tweet) {
105+
var query = "MERGE (author:Person {author_id:$author_id}) MERGE (tweet:Tweet {id:$id, text:$text, lang:$lang, created_at:$created_at}) MERGE (conversation:Conversation {id:$conv_id}) MERGE (author) - [:Authored] -> (tweet) MERGE (tweet) - [:In_Conversation] -> (conversation)";
106+
107+
try {
108+
const result = await session
109+
.run(query, {
110+
author_id: neo4j.int(tweet.author_id),
111+
id: neo4j.int(tweet.id),
112+
text: tweet.text,
113+
conv_id: neo4j.int(tweet.conversation_id),
114+
lang: tweet.lang,
115+
created_at: tweet.created_at,
116+
text: tweet.text,
117+
});
118+
} catch (err) {
119+
console.log("There was an error in addTweetToDatabase")
120+
console.log("I was trying to add the following tweet:")
121+
console.log(tweet)
122+
console.log(err)
123+
}
124+
125+
for(const referenced in tweet.referenced_tweets) {
126+
try {
127+
const result = await session.run("MERGE (tweet:Tweet {id:$id}) MERGE (ref:Tweet {id:$rid}) MERGE (tweet) - [:References {type:$refType}] -> (ref)", {
128+
id: neo4j.int(tweet.id),
129+
rid: neo4j.int(tweet.referenced_tweets[referenced].id),
130+
refType: tweet.referenced_tweets[referenced].type
131+
}
132+
)
133+
} catch(err) {
134+
console.log("There was an error while adding a referenced tweet")
135+
console.log(err)
136+
}
137+
}
138+
}

2. add_json_to_db.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from neo4j import GraphDatabase
2+
from stanza.server import CoreNLPClient
3+
import json
4+
5+
conversationID = 1372376279882862595
6+
7+
8+
client = CoreNLPClient(annotators=['parse'], output_format="json")
9+
10+
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
11+
session = driver.session()
12+
13+
result = session.run("MATCH (n:Tweet)-[:In_Conversation]-(c:Conversation) WHERE c.id=$conv_id RETURN n.id as id, n.text as text", conv_id=conversationID)
14+
15+
for record in result:
16+
doc = client.annotate(record["text"])
17+
session.run("MATCH (tweet:Tweet) where tweet.id=$id SET tweet.stanford_json=$stanford_json", id=record['id'], stanford_json=json.dumps(doc))
18+
print(json.dumps(doc))
19+
print("done... \n\n")
20+
21+
22+
driver.close()

3. add_predicates_to_db.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from neo4j import GraphDatabase
2+
import subprocess
3+
import os
4+
5+
def runPropSInPython2():
6+
python3_command = "C:\Python27\python ./run_parse_props.py ./stanford_output.json -t --oie --corenlp-json-input"
7+
new_env = os.environ.copy()
8+
new_env.update({"PYTHONPATH": "."})
9+
process = subprocess.Popen(python3_command, shell=True, env=new_env)
10+
output, error = process.communicate()
11+
print(output)
12+
13+
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
14+
session = driver.session()
15+
16+
# conversationID = 1372376279882862595
17+
# result = session.run("MATCH (n:Tweet)-[:In_Conversation]-(c:Conversation) WHERE c.id=$conv_id RETURN n.id as id, n.stanford_json as stanford_json", conv_id=conversationID)
18+
result = session.run("MATCH (n:Tweet) RETURN n.id as id, n.stanford_json as stanford_json", conv_id=conversationID)
19+
20+
for record in result:
21+
open("stanford_output.json", mode="w+").close()
22+
file = open("stanford_output.json", mode="w+")
23+
file.write(record["stanford_json"])
24+
file.close()
25+
runPropSInPython2()
26+
27+
28+
driver.close()

License.md

Lines changed: 0 additions & 21 deletions
This file was deleted.

PIPELINE.md

Lines changed: 0 additions & 56 deletions
This file was deleted.

README.md

Lines changed: 0 additions & 42 deletions
This file was deleted.

__init__.py

Whitespace-only changes.

corenlp_server-0e0bb842d4764b1f.props

Whitespace-only changes.

corenlp_server-9bf1a8da2c314e42.props

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
annotators = parse
2+
3+
outputFormat = json
4+

handling_geoquery_question.md

Lines changed: 0 additions & 45 deletions
This file was deleted.

propStoy.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from stanza.server import CoreNLPClient
2+
import json
3+
import subprocess
4+
import os
5+
6+
7+
def runPropSInPython2():
8+
python3_command = "C:\Python27\python ./run_parse_props.py ./toy.json -t --oie --corenlp-json-input"
9+
new_env = os.environ.copy()
10+
new_env.update({"PYTHONPATH": "."})
11+
process = subprocess.Popen(python3_command, shell=True, env=new_env)
12+
output, error = process.communicate()
13+
print(output)
14+
15+
16+
client = CoreNLPClient(annotators=['parse'], output_format="json")
17+
18+
while True:
19+
sentence = input("Please input a sentence\n")
20+
doc = client.annotate(sentence)
21+
22+
open("toy.json", mode="w+").close()
23+
file = open("toy.json", mode="w+")
24+
file.write(json.dumps(doc))
25+
file.close()
26+
27+
runPropSInPython2()

0 commit comments

Comments
 (0)