|
|
Basic MongoDB queries on the tweets collection:
|
|
|
|
|
|
use twitter
|
|
|
//find all (first 50)
|
|
|
|
|
|
find all (first 50)
|
|
|
|
|
|
db.tweets.find()
|
|
|
|
|
|
//count all
|
|
|
count all
|
|
|
|
|
|
db.tweets.find().count()
|
|
|
|
|
|
//find those satysfying a condition (date, language) and count them
|
|
|
find those satysfying a condition (date, language) and count them
|
|
|
|
|
|
db.tweets.find({"created_at" : {$gte : ISODate("2014-05-01T00:00:00:000Z") }})
|
|
|
db.tweets.find({"created_at" : {$gte : ISODate("2014-05-05T00:00:00:000Z"), $lt: ISODate("2014-05-06T00:00:00:000Z")}}).count()
|
|
|
db.tweets.find( lang : "en"}).count()
|
|
|
db.tweets.find({"created_at" : {$gte : ISODate("2014-05-05T00:00:00:000Z"), $lt: ISODate("2014-05-06T00:00:00:000Z")}, lang : "en"}).count()
|
|
|
|
|
|
//(ascending) index on filed "created_at" to improve performance
|
|
|
(ascending) index on filed "created_at" to improve performance
|
|
|
|
|
|
db.twitter.ensureIndex( { created_at: 1 } )
|
|
|
|
|
|
//fulltex index on the field "text"
|
|
|
fulltex index on the field "text"
|
|
|
|
|
|
db.tweets.ensureIndex( { text: "text" } )
|
|
|
|
|
|
//fulltext search
|
|
|
fulltext search
|
|
|
|
|
|
db.tweets.runCommand( "text", {search: "usd"} )
|
|
|
db.tweets.find({ $text:{$search: "usd"}})
|
|
|
|
|
|
//the aggregation framework for pipelines, count number of tweets per day
|
|
|
the aggregation framework for pipelines, count number of tweets per day
|
|
|
|
|
|
db.tweets.aggregate(
|
|
|
[
|
|
|
{ $project: { day : {y: {$year:"$created_at"}, m : {$month:"$created_at"}, d:{$dayOfMonth:"$created_at" }}}},
|
... | ... | @@ -41,7 +49,8 @@ Basic MongoDB queries on the tweets collection: |
|
|
)
|
|
|
|
|
|
|
|
|
//count the number of tweets matching a fulltext query per day
|
|
|
count the number of tweets matching a fulltext query per day
|
|
|
|
|
|
db.tweets.aggregate(
|
|
|
[
|
|
|
{ $match : { $text: { $search: "query" } } },
|
... | ... | @@ -51,26 +60,26 @@ Basic MongoDB queries on the tweets collection: |
|
|
]
|
|
|
)
|
|
|
|
|
|
Using map-reduce
|
|
|
|
|
|
db.tweets.mapReduce(
|
|
|
function(){emit(this.created_at.toDateString(), 1)},
|
|
|
function(key, values){return Array.sum(values)},
|
|
|
{
|
|
|
out: "counted"
|
|
|
}
|
|
|
)
|
|
|
|
|
|
db.tweets.mapReduce(
|
|
|
function(){emit(this.created_at.toDateString(), 1)},
|
|
|
function(key, values){return Array.sum(values)},
|
|
|
{
|
|
|
out: "counted"
|
|
|
}
|
|
|
)
|
|
|
|
|
|
db.tweets.group(
|
|
|
{
|
|
|
keyf: function(doc)
|
|
|
{
|
|
|
return {'date': doc.created_at.toDateString()}
|
|
|
},
|
|
|
reduce: function (date, counter)
|
|
|
{
|
|
|
counter.cnt++
|
|
|
},
|
|
|
initial: {cnt:0},
|
|
|
}
|
|
|
) |
|
|
\ No newline at end of file |
|
|
db.tweets.group(
|
|
|
{
|
|
|
keyf: function(doc)
|
|
|
{
|
|
|
return {'date': doc.created_at.toDateString()}
|
|
|
},
|
|
|
reduce: function (date, counter)
|
|
|
{
|
|
|
counter.cnt++
|
|
|
},
|
|
|
initial: {cnt:0},
|
|
|
}
|
|
|
) |
|
|
\ No newline at end of file |