|
|
Basic MongoDB queries on the tweets collection:
|
|
|
|
|
|
use twitter
|
|
|
use twitter
|
|
|
//find all (first 50)
|
|
|
db.tweets.find()
|
|
|
db.tweets.find()
|
|
|
|
|
|
//count all
|
|
|
db.tweets.find().count()
|
|
|
db.tweets.find().count()
|
|
|
|
|
|
//find those satysfying a condition (date, language) and count them
|
|
|
db.tweets.find({"created_at" : {$gte : ISODate("2014-05-01T00:00:00:000Z") }})
|
|
|
db.tweets.find({"created_at" : {$gte : ISODate("2014-05-05T00:00:00:000Z"), $lt: ISODate("2014-05-06T00:00:00:000Z")}}).count()
|
|
|
db.tweets.find( lang : "en"}).count()
|
|
|
db.tweets.find({"created_at" : {$gte : ISODate("2014-05-05T00:00:00:000Z"), $lt: ISODate("2014-05-06T00:00:00:000Z")}, lang : "en"}).count()
|
|
|
db.tweets.find({"created_at" : {$gte : ISODate("2014-05-01T00:00:00:000Z") }})
|
|
|
db.tweets.find({"created_at" : {$gte : ISODate("2014-05-05T00:00:00:000Z"), $lt: ISODate("2014-05-06T00:00:00:000Z")}}).count()
|
|
|
db.tweets.find( lang : "en"}).count()
|
|
|
db.tweets.find({"created_at" : {$gte : ISODate("2014-05-05T00:00:00:000Z"), $lt: ISODate("2014-05-06T00:00:00:000Z")}, lang : "en"}).count()
|
|
|
|
|
|
//(ascending) index on filed "created_at" to improve performance
|
|
|
db.twitter.ensureIndex( { created_at: 1 } )
|
|
|
db.twitter.ensureIndex( { created_at: 1 } )
|
|
|
|
|
|
//fulltex index on the field "text"
|
|
|
db.tweets.ensureIndex( { text: "text" } )
|
|
|
db.tweets.ensureIndex( { text: "text" } )
|
|
|
|
|
|
//fulltext search
|
|
|
db.tweets.runCommand( "text", {search: "usd"} )
|
|
|
db.tweets.find({ $text:{$search: "usd"}})
|
|
|
db.tweets.runCommand( "text", {search: "usd"} )
|
|
|
db.tweets.find({ $text:{$search: "usd"}})
|
|
|
|
|
|
//the aggregation framework for pipelines, count number of tweets per day
|
|
|
db.tweets.aggregate(
|
|
|
db.tweets.aggregate(
|
|
|
[
|
|
|
{ $project: { day : {y: {$year:"$created_at"}, m : {$month:"$created_at"}, d:{$dayOfMonth:"$created_at" }}}},
|
|
|
{ $group : { _id : "$day", number : { $sum : 1 }}},
|
|
|
{ $sort : { _id : 1 }}
|
|
|
]
|
|
|
)
|
|
|
)
|
|
|
|
|
|
db.tweets.aggregate(
|
|
|
db.tweets.aggregate(
|
|
|
[
|
|
|
{ $project : { day : {$substr: ["$created_at",0,10]}}},
|
|
|
{ $group : { _id : "$day", number : { $sum : 1 }}},
|
|
|
{ $sort : { _id : 1 }}
|
|
|
]
|
|
|
)
|
|
|
)
|
|
|
|
|
|
|
|
|
//count the number of tweets matching a fulltext query per day
|
|
|
db.tweets.aggregate(
|
|
|
db.tweets.aggregate(
|
|
|
[
|
|
|
{ $match : { $text: { $search: "query" } } },
|
|
|
{ $project : { day : {$substr: ["$created_at",0,10]}}},
|
|
|
{ $group : { _id : "$day", number : { $sum : 1 }}},
|
|
|
{ $sort : { _id : 1 }}
|
|
|
]
|
|
|
)
|
|
|
)
|
|
|
|
|
|
|
|
|
|
... | ... | |