daviegourevitch
diff --git a/‎1. mirror_tweets_in_neo4j.js
Lines changed: 138 additions & 0 deletions b/‎1. mirror_tweets_in_neo4j.js
Lines changed: 138 additions & 0 deletions
diff --git a/‎2. add_json_to_db.py
Lines changed: 22 additions & 0 deletions b/‎2. add_json_to_db.py
Lines changed: 22 additions & 0 deletions
diff --git a/‎3. add_predicates_to_db.py
Lines changed: 28 additions & 0 deletions b/‎3. add_predicates_to_db.py
Lines changed: 28 additions & 0 deletions
diff --git a/‎License.md
Lines changed: 0 additions & 21 deletions b/‎License.md
Lines changed: 0 additions & 21 deletions
diff --git a/‎PIPELINE.md
Lines changed: 0 additions & 56 deletions b/‎PIPELINE.md
Lines changed: 0 additions & 56 deletions
diff --git a/‎README.md
Lines changed: 0 additions & 42 deletions b/‎README.md
Lines changed: 0 additions & 42 deletions
diff --git a/‎__init__.py b/‎__init__.py
diff --git a/‎corenlp_server-0e0bb842d4764b1f.props b/‎corenlp_server-0e0bb842d4764b1f.props
diff --git a/‎corenlp_server-9bf1a8da2c314e42.props
Lines changed: 4 additions & 0 deletions b/‎corenlp_server-9bf1a8da2c314e42.props
Lines changed: 4 additions & 0 deletions
diff --git a/‎handling_geoquery_question.md
Lines changed: 0 additions & 45 deletions b/‎handling_geoquery_question.md
Lines changed: 0 additions & 45 deletions
diff --git a/‎propStoy.py
Lines changed: 27 additions & 0 deletions b/‎propStoy.py
Lines changed: 27 additions & 0 deletions
@@ -0,0 +1,138 @@
+import * as neo4j from 'neo4j-driver'
+import needle from 'needle';
+import credentials from './credentials/Twitter.js';
+
+// Globals
+const TOKEN = credentials.auth_tokens.BEARER_TOKEN;
+const GET_ENDPOINT = "https://api.twitter.com/2/tweets";
+const SEARCH_RECENT_ENDPOINT = "https://api.twitter.com/2/tweets/search/recent";
+
+// https://twitter.com/briantylercohen/status/1369403905956847618 <- use this!!!!
+// ^ try this one
+
+////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////  MAIN  //////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+// Startup database
+const driver = neo4j.driver('bolt://localhost:7687', neo4j.auth.basic('neo4j', "password"))
+const session = driver.session()
+
+
+const rootTweetId = "1372316893739163652";
+const rootTweet = await addTweetThreadToDb(rootTweetId);
+
+// Close database
+await driver.close()
+
+////////////////////////////////////////////////////////////////////////////////
+//////////////////////////// ENDPOINT FUNCTIONS  ///////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+async function get(endpointURL, params) {
+    const res = await needle('get', endpointURL, params, { headers: {
+        "authorization": `Bearer ${TOKEN}`
+    }})
+
+    if (res.body) {
+        const ret = {
+          body: res.body,
+          headers: res.headers
+        }
+        return ret;
+    } else {
+        throw new Error('Unsuccessful request')
+    }
+}
+
+async function getTweet(id) {
+  if (id == null) throw new Error ("ID not specified in getTweets");
+  const params = {
+    "ids": id,
+    "tweet.fields": "author_id,conversation_id,created_at,entities,lang,referenced_tweets,text",
+  }
+  return await get(GET_ENDPOINT, params);
+}
+
+async function recentSearch(conversation_id, next_token) {
+  if (conversation_id == null) throw new Error ("ID not specified in recentSearch");
+  const params = {
+    "query":`conversation_id:${conversation_id}`,
+    "max_results": 100,
+    "tweet.fields": "author_id,conversation_id,created_at,entities,lang,referenced_tweets,text",
+  }
+  if(next_token) {params.next_token = next_token}
+
+  return await get(SEARCH_RECENT_ENDPOINT, params);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//////////////////////////// RETRIEVAL FUNCTIONS  //////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+async function addTweetThreadToDb(id) {
+  // Get first page
+  const conversation_id = (await getTweet(id)).body.data[0].conversation_id;
+  var page = await recentSearch(conversation_id);
+
+  for(const tweet in page.body.data) {
+    await addTweetToDatabase(page.body.data[tweet]);
+  }
+
+  // If more pages...
+  while(page.body.meta.next_token) {
+    var page = await recentSearch(id, page.body.meta.next_token);
+
+    for(const tweet in page.body.data) {
+      await addTweetToDatabase(page.body.data[tweet]);
+    }
+
+    if(page.headers["x-rate-limit-remaining"] <= 0) {
+      console.log("I'm rate limited... Pausing execution for " + page.headers["x-rate-limit-reset"] + " seconds.")
+      await sleep(page.headers["x-rate-limit-remaining"]*1000)
+    }
+  }
+}
+
+function sleep(ms) {
+  return new Promise(resolve => setTimeout(resolve, ms));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//////////////////////////// DATABASE FUNCTIONS  ///////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+async function addTweetToDatabase(tweet) {
+  var query = "MERGE (author:Person {author_id:$author_id}) MERGE (tweet:Tweet {id:$id, text:$text, lang:$lang, created_at:$created_at}) MERGE (conversation:Conversation {id:$conv_id}) MERGE (author) - [:Authored] -> (tweet) MERGE (tweet) - [:In_Conversation] -> (conversation)";
+
+  try {
+    const result = await session
+      .run(query, {
+        author_id: neo4j.int(tweet.author_id),
+        id: neo4j.int(tweet.id),
+        text: tweet.text,
+        conv_id: neo4j.int(tweet.conversation_id),
+        lang: tweet.lang,
+        created_at: tweet.created_at,
+        text: tweet.text,
+      });
+  } catch (err) {
+    console.log("There was an error in addTweetToDatabase")
+    console.log("I was trying to add the following tweet:")
+    console.log(tweet)
+    console.log(err)
+  }
+
+  for(const referenced in tweet.referenced_tweets) {
+    try {
+      const result = await session.run("MERGE (tweet:Tweet {id:$id}) MERGE (ref:Tweet {id:$rid}) MERGE (tweet) - [:References {type:$refType}] -> (ref)", {
+        id: neo4j.int(tweet.id),
+        rid: neo4j.int(tweet.referenced_tweets[referenced].id),
+        refType: tweet.referenced_tweets[referenced].type
+      }
+    )
+    } catch(err) {
+      console.log("There was an error while adding a referenced tweet")
+      console.log(err)
+    }
+  }
+}
@@ -0,0 +1,22 @@
+from neo4j import GraphDatabase
+from stanza.server import CoreNLPClient
+import json
+
+conversationID = 1372376279882862595
+
+
+client = CoreNLPClient(annotators=['parse'], output_format="json")
+
+driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
+session = driver.session()
+
+result = session.run("MATCH (n:Tweet)-[:In_Conversation]-(c:Conversation) WHERE c.id=$conv_id RETURN n.id as id, n.text as text", conv_id=conversationID)
+
+for record in result:
+    doc = client.annotate(record["text"])
+    session.run("MATCH (tweet:Tweet) where tweet.id=$id SET tweet.stanford_json=$stanford_json", id=record['id'], stanford_json=json.dumps(doc))
+    print(json.dumps(doc))
+    print("done... \n\n")
+
+
+driver.close()
@@ -0,0 +1,28 @@
+from neo4j import GraphDatabase
+import subprocess
+import os
+
+def runPropSInPython2():
+    python3_command = "C:\Python27\python ./run_parse_props.py ./stanford_output.json -t --oie --corenlp-json-input"
+    new_env = os.environ.copy()
+    new_env.update({"PYTHONPATH": "."})
+    process = subprocess.Popen(python3_command, shell=True, env=new_env)
+    output, error = process.communicate()
+    print(output)
+
+driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
+session = driver.session()
+
+# conversationID = 1372376279882862595
+# result = session.run("MATCH (n:Tweet)-[:In_Conversation]-(c:Conversation) WHERE c.id=$conv_id RETURN n.id as id, n.stanford_json as stanford_json", conv_id=conversationID)
+result = session.run("MATCH (n:Tweet) RETURN n.id as id, n.stanford_json as stanford_json", conv_id=conversationID)
+
+for record in result:
+    open("stanford_output.json", mode="w+").close()
+    file = open("stanford_output.json", mode="w+")
+    file.write(record["stanford_json"])
+    file.close()
+    runPropSInPython2()
+
+
+driver.close()
@@ -0,0 +1,4 @@
+annotators = parse
+
+outputFormat = json
+
@@ -0,0 +1,27 @@
+from stanza.server import CoreNLPClient
+import json
+import subprocess
+import os
+
+
+def runPropSInPython2():
+    python3_command = "C:\Python27\python ./run_parse_props.py ./toy.json -t --oie --corenlp-json-input"
+    new_env = os.environ.copy()
+    new_env.update({"PYTHONPATH": "."})
+    process = subprocess.Popen(python3_command, shell=True, env=new_env)
+    output, error = process.communicate()
+    print(output)
+
+
+client = CoreNLPClient(annotators=['parse'], output_format="json")
+
+while True:
+    sentence = input("Please input a sentence\n")
+    doc = client.annotate(sentence)
+
+    open("toy.json", mode="w+").close()
+    file = open("toy.json", mode="w+")
+    file.write(json.dumps(doc))
+    file.close()
+
+    runPropSInPython2()
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +annotators = parse
++
 +outputFormat = json
++