use the pending list instead of scraping individual beatmap pages

This commit is contained in:
Michael Zhang 2020-10-12 07:23:32 -05:00
parent e6e9ff9fa3
commit e84affb5e2
Signed by: michael
GPG key ID: BDA47A31A3C8EE6B
5 changed files with 173 additions and 130 deletions

46
bot.go
View file

@ -74,17 +74,16 @@ func (bot *Bot) errWrap(fn interface{}) interface{} {
return newFunc.Interface() return newFunc.Interface()
} }
func (bot *Bot) NotifyNewEvent(channels []string, newMaps []Event) (err error) { func (bot *Bot) NotifyNewBeatmap(channels []string, newMaps []Beatmapset) (err error) {
for i, event := range newMaps { for i, beatmapSet := range newMaps {
var eventTime time.Time var eventTime time.Time
eventTime, err = time.Parse(time.RFC3339, event.CreatedAt) eventTime, err = time.Parse(time.RFC3339, beatmapSet.LastUpdated)
if err != nil { if err != nil {
return return
} }
log.Println(i, "event time", eventTime) log.Println(i, "event time", eventTime)
var ( var (
gotBeatmapInfo = false
beatmapSet Beatmapset beatmapSet Beatmapset
gotDownloadedBeatmap = false gotDownloadedBeatmap = false
downloadedBeatmap BeatmapsetDownloaded downloadedBeatmap BeatmapsetDownloaded
@ -96,11 +95,7 @@ func (bot *Bot) NotifyNewEvent(channels []string, newMaps []Event) (err error) {
foundPatch = false foundPatch = false
// commitFiles *object.FileIter // commitFiles *object.FileIter
) )
beatmapSet, err = bot.getBeatmapsetInfo(event) // beatmapSet, err = bot.getBeatmapsetInfo(beatmap)
if err != nil {
log.Println("failed to retrieve beatmap info:", err)
} else {
gotBeatmapInfo = true
// try to open a repo for this beatmap // try to open a repo for this beatmap
var repo *git.Repository var repo *git.Repository
@ -150,7 +145,7 @@ func (bot *Bot) NotifyNewEvent(channels []string, newMaps []Event) (err error) {
worktree.Add(f.Name()) worktree.Add(f.Name())
} }
hash, err = worktree.Commit( hash, err = worktree.Commit(
fmt.Sprintf("evtID: %d", event.ID), fmt.Sprintf("update: %d", beatmapSet.ID),
&git.CommitOptions{ &git.CommitOptions{
Author: &object.Signature{ Author: &object.Signature{
Name: beatmapSet.Creator, Name: beatmapSet.Creator,
@ -180,20 +175,12 @@ func (bot *Bot) NotifyNewEvent(channels []string, newMaps []Event) (err error) {
foundPatch = true foundPatch = true
} }
// report diffs
}
log.Println("BEATMAP SET", beatmapSet) log.Println("BEATMAP SET", beatmapSet)
embed := &discordgo.MessageEmbed{ embed := &discordgo.MessageEmbed{
URL: "https://osu.ppy.sh" + event.Beatmapset.URL, URL: fmt.Sprintf("https://osu.ppy.sh/s/%d", beatmapSet.ID),
Title: event.Type + ": " + event.Beatmapset.Title, Title: fmt.Sprintf("Update: %s - %s", beatmapSet.Artist, beatmapSet.Title),
Timestamp: event.CreatedAt, Timestamp: eventTime.String(),
Footer: &discordgo.MessageEmbedFooter{ Author: &discordgo.MessageEmbedAuthor{
Text: fmt.Sprintf("Event ID: %d", event.ID),
},
}
if gotBeatmapInfo {
embed.Author = &discordgo.MessageEmbedAuthor{
URL: "https://osu.ppy.sh/u/" + strconv.Itoa(beatmapSet.UserId), URL: "https://osu.ppy.sh/u/" + strconv.Itoa(beatmapSet.UserId),
Name: beatmapSet.Creator, Name: beatmapSet.Creator,
IconURL: fmt.Sprintf( IconURL: fmt.Sprintf(
@ -201,9 +188,10 @@ func (bot *Bot) NotifyNewEvent(channels []string, newMaps []Event) (err error) {
beatmapSet.UserId, beatmapSet.UserId,
time.Now().Unix(), time.Now().Unix(),
), ),
} },
embed.Thumbnail = &discordgo.MessageEmbedThumbnail{ Thumbnail: &discordgo.MessageEmbedThumbnail{
URL: beatmapSet.Covers.SlimCover2x, URL: beatmapSet.Covers.SlimCover2x,
},
} }
if gotDownloadedBeatmap { if gotDownloadedBeatmap {
@ -212,7 +200,7 @@ func (bot *Bot) NotifyNewEvent(channels []string, newMaps []Event) (err error) {
embed.Description = patch.Stats().String() embed.Description = patch.Stats().String()
} }
} }
}
for _, channelId := range channels { for _, channelId := range channels {
bot.ChannelMessageSendEmbed(channelId, embed) bot.ChannelMessageSendEmbed(channelId, embed)
} }
@ -301,10 +289,10 @@ func (bot *Bot) newMessageHandler(s *discordgo.Session, m *discordgo.MessageCrea
return return
} }
go func() { // go func() {
time.Sleep(refreshInterval) // time.Sleep(refreshInterval)
bot.requests <- mapperId // bot.requests <- mapperId
}() // }()
bot.ChannelMessageSend(m.ChannelID, fmt.Sprintf("subscribed to %+v", mapper)) bot.ChannelMessageSend(m.ChannelID, fmt.Sprintf("subscribed to %+v", mapper))
} }

View file

@ -24,6 +24,7 @@ func main() {
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
log.Println("opened db")
bot, err := NewBot(&config, db, requests) bot, err := NewBot(&config, db, requests)
if err != nil { if err != nil {
@ -59,5 +60,6 @@ func main() {
db.Close() db.Close()
bot.Close() bot.Close()
ticker.Stop()
os.Exit(code) os.Exit(code)
} }

View file

@ -18,6 +18,7 @@ type Beatmapset struct {
Covers BeatmapCovers `json:"covers"` Covers BeatmapCovers `json:"covers"`
Beatmaps []Beatmap `json:"beatmaps,omitempty"` Beatmaps []Beatmap `json:"beatmaps,omitempty"`
LastUpdated string `json:"last_updated,omitempty"`
} }
type Beatmap struct { type Beatmap struct {
@ -65,3 +66,7 @@ type EventUser struct {
URL string `json:"url"` URL string `json:"url"`
PreviousUsername string `json:"previousUsername,omitempty"` PreviousUsername string `json:"previousUsername,omitempty"`
} }
type BeatmapSearch struct {
Beatmapsets []Beatmapset `json:"beatmapsets"`
}

View file

@ -8,6 +8,7 @@ import (
"io/ioutil" "io/ioutil"
"log" "log"
"net/http" "net/http"
"net/url"
"os" "os"
"strings" "strings"
"time" "time"
@ -28,7 +29,7 @@ type Osuapi struct {
func NewOsuapi(config *Config) *Osuapi { func NewOsuapi(config *Config) *Osuapi {
client := &http.Client{ client := &http.Client{
Timeout: 10 * time.Second, Timeout: 9 * time.Second,
} }
// want to cap at around 1000 requests a minute, OSU cap is 1200 // want to cap at around 1000 requests a minute, OSU cap is 1200
@ -209,6 +210,19 @@ func (api *Osuapi) GetUserEvents(userId int, limit int, offset int) (events []Ev
return return
} }
func (api *Osuapi) SearchBeatmaps(rankStatus string) (beatmapSearch BeatmapSearch, err error) {
values := url.Values{}
values.Set("s", rankStatus)
query := values.Encode()
url := "/beatmapsets/search?" + query
err = api.Request("GET", url, &beatmapSearch)
if err != nil {
return
}
return
}
type OsuToken struct { type OsuToken struct {
TokenType string `json:"token_type"` TokenType string `json:"token_type"`
ExpiresIn int `json:"expires_in"` ExpiresIn int `json:"expires_in"`

View file

@ -7,40 +7,74 @@ import (
) )
var ( var (
refreshInterval = 60 * time.Second refreshInterval = 30 * time.Second
ticker = time.NewTicker(refreshInterval)
) )
func RunScraper(bot *Bot, db *Db, api *Osuapi, requests chan int) { func RunScraper(bot *Bot, db *Db, api *Osuapi, requests chan int) {
// start timers lastUpdateTime := time.Now()
go startTimers(db, requests) go func() {
for range ticker.C {
for userId := range requests { // build a list of currently tracked mappers
log.Println("scraping user", userId) trackedMappers := make(map[int]int)
newMaps, err := getNewMaps(db, api, userId) db.IterTrackedMappers(func(userId int) error {
if err != nil { trackedMappers[userId] = 1
log.Println("err getting new maps:", err)
}
log.Println("new maps for", userId, newMaps)
if len(newMaps) > 0 {
channels := make([]string, 0)
db.IterTrackingChannels(userId, func(channelId string) error {
channels = append(channels, channelId)
return nil return nil
}) })
err := bot.NotifyNewEvent(channels, newMaps) // TODO: is this sorted for sure??
pendingSets, err := bot.api.SearchBeatmaps("pending")
if err != nil {
log.Println("error fetching pending sets", err)
}
allNewMaps := make(map[int][]Beatmapset, 0)
var newLastUpdateTime = time.Unix(0, 0)
for _, beatmap := range pendingSets.Beatmapsets {
updatedTime, err := time.Parse(time.RFC3339, beatmap.LastUpdated)
if err != nil {
log.Println("error parsing last updated time", updatedTime)
}
if updatedTime.After(newLastUpdateTime) {
// update lastUpdateTime to latest updated map
newLastUpdateTime = updatedTime
}
if !updatedTime.After(lastUpdateTime) {
break
}
if mapperId, ok := trackedMappers[beatmap.UserId]; ok {
if _, ok2 := allNewMaps[mapperId]; !ok2 {
allNewMaps[mapperId] = make([]Beatmapset, 0)
}
allNewMaps[mapperId] = append(allNewMaps[mapperId], beatmap)
}
}
if len(allNewMaps) > 0 {
log.Println("all new maps", allNewMaps)
for mapperId, newMaps := range allNewMaps {
channels := make([]string, 0)
db.IterTrackingChannels(mapperId, func(channelId string) error {
channels = append(channels, channelId)
return nil
})
log.Println(newMaps)
err := bot.NotifyNewBeatmap(channels, newMaps)
if err != nil { if err != nil {
log.Println("error notifying new maps", err) log.Println("error notifying new maps", err)
} }
} }
// wait a minute and put them back into the queue
go func(id int) {
time.Sleep(refreshInterval)
requests <- id
}(userId)
} }
lastUpdateTime = newLastUpdateTime
log.Println("last updated time", lastUpdateTime)
}
}()
} }
func getNewMaps(db *Db, api *Osuapi, userId int) (newMaps []Event, err error) { func getNewMaps(db *Db, api *Osuapi, userId int) (newMaps []Event, err error) {