use the pending list instead of scraping individual beatmap pages

This commit is contained in:
Michael Zhang 2020-10-12 07:23:32 -05:00
parent e6e9ff9fa3
commit e84affb5e2
Signed by: michael
GPG key ID: BDA47A31A3C8EE6B
5 changed files with 173 additions and 130 deletions

192
bot.go
View file

@ -74,17 +74,16 @@ func (bot *Bot) errWrap(fn interface{}) interface{} {
return newFunc.Interface()
}
func (bot *Bot) NotifyNewEvent(channels []string, newMaps []Event) (err error) {
for i, event := range newMaps {
func (bot *Bot) NotifyNewBeatmap(channels []string, newMaps []Beatmapset) (err error) {
for i, beatmapSet := range newMaps {
var eventTime time.Time
eventTime, err = time.Parse(time.RFC3339, event.CreatedAt)
eventTime, err = time.Parse(time.RFC3339, beatmapSet.LastUpdated)
if err != nil {
return
}
log.Println(i, "event time", eventTime)
var (
gotBeatmapInfo = false
beatmapSet Beatmapset
gotDownloadedBeatmap = false
downloadedBeatmap BeatmapsetDownloaded
@ -96,104 +95,92 @@ func (bot *Bot) NotifyNewEvent(channels []string, newMaps []Event) (err error) {
foundPatch = false
// commitFiles *object.FileIter
)
beatmapSet, err = bot.getBeatmapsetInfo(event)
// beatmapSet, err = bot.getBeatmapsetInfo(beatmap)
// try to open a repo for this beatmap
var repo *git.Repository
repoDir := path.Join(bot.config.Repos, strconv.Itoa(beatmapSet.ID))
if _, err := os.Stat(repoDir); os.IsNotExist(err) {
os.MkdirAll(repoDir, 0777)
}
repo, err = git.PlainOpen(repoDir)
if err == git.ErrRepositoryNotExists {
// create a new repo
repo, err = git.PlainInit(repoDir, false)
}
if err != nil {
log.Println("failed to retrieve beatmap info:", err)
return
}
// download latest updates to the map
err = bot.downloadBeatmapTo(&beatmapSet, repo, repoDir)
if err != nil {
log.Println("failed to download beatmap:", err)
} else {
gotBeatmapInfo = true
gotDownloadedBeatmap = true
}
// try to open a repo for this beatmap
var repo *git.Repository
repoDir := path.Join(bot.config.Repos, strconv.Itoa(beatmapSet.ID))
if _, err := os.Stat(repoDir); os.IsNotExist(err) {
os.MkdirAll(repoDir, 0777)
// create a commit
var (
worktree *git.Worktree
files []os.FileInfo
hash plumbing.Hash
)
worktree, err = repo.Worktree()
if err != nil {
return
}
// status, err = worktree.Status()
// if err != nil {
// return
// }
files, err = ioutil.ReadDir(repoDir)
if err != nil {
return
}
for _, f := range files {
if f.Name() == ".git" {
continue
}
repo, err = git.PlainOpen(repoDir)
if err == git.ErrRepositoryNotExists {
// create a new repo
repo, err = git.PlainInit(repoDir, false)
}
if err != nil {
return
}
// download latest updates to the map
err = bot.downloadBeatmapTo(&beatmapSet, repo, repoDir)
if err != nil {
log.Println("failed to download beatmap:", err)
} else {
gotDownloadedBeatmap = true
}
// create a commit
var (
worktree *git.Worktree
files []os.FileInfo
hash plumbing.Hash
)
worktree, err = repo.Worktree()
if err != nil {
return
}
// status, err = worktree.Status()
// if err != nil {
// return
// }
files, err = ioutil.ReadDir(repoDir)
if err != nil {
return
}
for _, f := range files {
if f.Name() == ".git" {
continue
}
worktree.Add(f.Name())
}
hash, err = worktree.Commit(
fmt.Sprintf("evtID: %d", event.ID),
&git.CommitOptions{
Author: &object.Signature{
Name: beatmapSet.Creator,
Email: "nobody@localhost",
When: eventTime,
},
worktree.Add(f.Name())
}
hash, err = worktree.Commit(
fmt.Sprintf("update: %d", beatmapSet.ID),
&git.CommitOptions{
Author: &object.Signature{
Name: beatmapSet.Creator,
Email: "nobody@localhost",
When: eventTime,
},
)
},
)
if err != nil {
return
}
commit, err = repo.CommitObject(hash)
if err != nil {
return
}
parent, err = commit.Parent(0)
if err == object.ErrParentNotFound {
} else if err != nil {
return
} else {
patch, err = commit.Patch(parent)
if err != nil {
return
}
commit, err = repo.CommitObject(hash)
if err != nil {
return
}
parent, err = commit.Parent(0)
if err == object.ErrParentNotFound {
} else if err != nil {
return
} else {
patch, err = commit.Patch(parent)
if err != nil {
return
}
foundPatch = true
}
// report diffs
foundPatch = true
}
log.Println("BEATMAP SET", beatmapSet)
embed := &discordgo.MessageEmbed{
URL: "https://osu.ppy.sh" + event.Beatmapset.URL,
Title: event.Type + ": " + event.Beatmapset.Title,
Timestamp: event.CreatedAt,
Footer: &discordgo.MessageEmbedFooter{
Text: fmt.Sprintf("Event ID: %d", event.ID),
},
}
if gotBeatmapInfo {
embed.Author = &discordgo.MessageEmbedAuthor{
URL: fmt.Sprintf("https://osu.ppy.sh/s/%d", beatmapSet.ID),
Title: fmt.Sprintf("Update: %s - %s", beatmapSet.Artist, beatmapSet.Title),
Timestamp: eventTime.String(),
Author: &discordgo.MessageEmbedAuthor{
URL: "https://osu.ppy.sh/u/" + strconv.Itoa(beatmapSet.UserId),
Name: beatmapSet.Creator,
IconURL: fmt.Sprintf(
@ -201,18 +188,19 @@ func (bot *Bot) NotifyNewEvent(channels []string, newMaps []Event) (err error) {
beatmapSet.UserId,
time.Now().Unix(),
),
}
embed.Thumbnail = &discordgo.MessageEmbedThumbnail{
},
Thumbnail: &discordgo.MessageEmbedThumbnail{
URL: beatmapSet.Covers.SlimCover2x,
}
},
}
if gotDownloadedBeatmap {
log.Println(downloadedBeatmap)
if foundPatch {
embed.Description = patch.Stats().String()
}
if gotDownloadedBeatmap {
log.Println(downloadedBeatmap)
if foundPatch {
embed.Description = patch.Stats().String()
}
}
for _, channelId := range channels {
bot.ChannelMessageSendEmbed(channelId, embed)
}
@ -301,10 +289,10 @@ func (bot *Bot) newMessageHandler(s *discordgo.Session, m *discordgo.MessageCrea
return
}
go func() {
time.Sleep(refreshInterval)
bot.requests <- mapperId
}()
// go func() {
// time.Sleep(refreshInterval)
// bot.requests <- mapperId
// }()
bot.ChannelMessageSend(m.ChannelID, fmt.Sprintf("subscribed to %+v", mapper))
}

View file

@ -24,6 +24,7 @@ func main() {
if err != nil {
log.Fatal(err)
}
log.Println("opened db")
bot, err := NewBot(&config, db, requests)
if err != nil {
@ -59,5 +60,6 @@ func main() {
db.Close()
bot.Close()
ticker.Stop()
os.Exit(code)
}

View file

@ -16,8 +16,9 @@ type Beatmapset struct {
Creator string `json:"creator"`
UserId int `json:"user_id"`
Covers BeatmapCovers `json:"covers"`
Beatmaps []Beatmap `json:"beatmaps,omitempty"`
Covers BeatmapCovers `json:"covers"`
Beatmaps []Beatmap `json:"beatmaps,omitempty"`
LastUpdated string `json:"last_updated,omitempty"`
}
type Beatmap struct {
@ -65,3 +66,7 @@ type EventUser struct {
URL string `json:"url"`
PreviousUsername string `json:"previousUsername,omitempty"`
}
type BeatmapSearch struct {
Beatmapsets []Beatmapset `json:"beatmapsets"`
}

View file

@ -8,6 +8,7 @@ import (
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"strings"
"time"
@ -28,7 +29,7 @@ type Osuapi struct {
func NewOsuapi(config *Config) *Osuapi {
client := &http.Client{
Timeout: 10 * time.Second,
Timeout: 9 * time.Second,
}
// want to cap at around 1000 requests a minute, OSU cap is 1200
@ -209,6 +210,19 @@ func (api *Osuapi) GetUserEvents(userId int, limit int, offset int) (events []Ev
return
}
func (api *Osuapi) SearchBeatmaps(rankStatus string) (beatmapSearch BeatmapSearch, err error) {
values := url.Values{}
values.Set("s", rankStatus)
query := values.Encode()
url := "/beatmapsets/search?" + query
err = api.Request("GET", url, &beatmapSearch)
if err != nil {
return
}
return
}
type OsuToken struct {
TokenType string `json:"token_type"`
ExpiresIn int `json:"expires_in"`

View file

@ -7,40 +7,74 @@ import (
)
var (
refreshInterval = 60 * time.Second
refreshInterval = 30 * time.Second
ticker = time.NewTicker(refreshInterval)
)
func RunScraper(bot *Bot, db *Db, api *Osuapi, requests chan int) {
// start timers
go startTimers(db, requests)
for userId := range requests {
log.Println("scraping user", userId)
newMaps, err := getNewMaps(db, api, userId)
if err != nil {
log.Println("err getting new maps:", err)
}
log.Println("new maps for", userId, newMaps)
if len(newMaps) > 0 {
channels := make([]string, 0)
db.IterTrackingChannels(userId, func(channelId string) error {
channels = append(channels, channelId)
lastUpdateTime := time.Now()
go func() {
for range ticker.C {
// build a list of currently tracked mappers
trackedMappers := make(map[int]int)
db.IterTrackedMappers(func(userId int) error {
trackedMappers[userId] = 1
return nil
})
err := bot.NotifyNewEvent(channels, newMaps)
// TODO: is this sorted for sure??
pendingSets, err := bot.api.SearchBeatmaps("pending")
if err != nil {
log.Println("error notifying new maps", err)
log.Println("error fetching pending sets", err)
}
}
// wait a minute and put them back into the queue
go func(id int) {
time.Sleep(refreshInterval)
requests <- id
}(userId)
}
allNewMaps := make(map[int][]Beatmapset, 0)
var newLastUpdateTime = time.Unix(0, 0)
for _, beatmap := range pendingSets.Beatmapsets {
updatedTime, err := time.Parse(time.RFC3339, beatmap.LastUpdated)
if err != nil {
log.Println("error parsing last updated time", updatedTime)
}
if updatedTime.After(newLastUpdateTime) {
// update lastUpdateTime to latest updated map
newLastUpdateTime = updatedTime
}
if !updatedTime.After(lastUpdateTime) {
break
}
if mapperId, ok := trackedMappers[beatmap.UserId]; ok {
if _, ok2 := allNewMaps[mapperId]; !ok2 {
allNewMaps[mapperId] = make([]Beatmapset, 0)
}
allNewMaps[mapperId] = append(allNewMaps[mapperId], beatmap)
}
}
if len(allNewMaps) > 0 {
log.Println("all new maps", allNewMaps)
for mapperId, newMaps := range allNewMaps {
channels := make([]string, 0)
db.IterTrackingChannels(mapperId, func(channelId string) error {
channels = append(channels, channelId)
return nil
})
log.Println(newMaps)
err := bot.NotifyNewBeatmap(channels, newMaps)
if err != nil {
log.Println("error notifying new maps", err)
}
}
}
lastUpdateTime = newLastUpdateTime
log.Println("last updated time", lastUpdateTime)
}
}()
}
func getNewMaps(db *Db, api *Osuapi, userId int) (newMaps []Event, err error) {