SlideShare a Scribd company logo
!
Crunching data with go:
Tips, tricks, use-cases
S e r g i i K h o m e n k o , D a t a S c i e n t i s t , S T Y L I G H T
s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r
M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
Agenda
Relational databases
!
Google Analytics and BigQuery
!
Geolocation
!
Useful things from Go-world
W H A T I T ’ S A B O U T
Relational databases
• github.com/jmoiron/sqlx
type Clickout struct {!
! Id, Count int!
! Ip string!
! Type int!
! Commision, Eu_commission float32!
}
! db, err := sqlx.Connect(config.Database.Driver, fmt.Sprintf("%s:%s@%s(%s)/%s?
parseTime=true", config.Database.Username,!
! ! config.Database.Password, config.Database.Protocol, config.Database.Server,
config.Database.Database))!
!! fmt.Printf("Connect to %s:(%s)... n", config.Database.Protocol,
config.Database.Server)!
! if err != nil {!
! ! log.Fatalf("Can not connect to the mysql server - %s", err)!
! ! return!
! }!
! defer db.Close()!
!!
! dbParams := paramStruct{"start": arguments["<from>"].(string) + " 00:00:00", "end":
arguments["<to>"].(string) + " 23:59:59"}!
! geoParams := paramStruct{}!
!! siteStr, _ := arguments["--site"].(string)!
! if siteInt, err2 := strconv.Atoi(siteStr); err2 == nil {!
! ! dbParams["site"] = siteInt!
! }!
!! query := getClickoutsQuery(dbParams)!
! rows, err := db.Queryx(query)
! if err == nil {!
! ! for rows.Next() {!
! ! ! click := Clickout{}!
!! ! ! err2 := rows.StructScan(&click)!
! ! ! if err2 == nil {!
! ! ! ! task <- click!
!! ! ! } else {!
! ! ! ! fmt.Println(err2)!
! ! ! }!
! ! }!
! ! close(task)!
! } else {!
! ! log.Fatalf("SQL Error - %s", err)!
! }!
Geolocation
W H E R E M Y I P S A R E F R O M
! task := make(chan Clickout)!
! result := make(chan IpResult)!
! done = make(chan interface{})!
!! go processChannel(task, result)!
! go aggregateResults(result, &results)!
!! if err == nil {!
! ! for rows.Next() {!
! ! ! click := Clickout{}!
!! ! ! err2 := rows.StructScan(&click)!
! ! ! if err2 == nil {!
! ! ! ! task <- click!
!! ! ! } else {!
! ! ! ! fmt.Println(err2)!
! ! ! }!
! ! }!
! ! close(task)!
! } else {!
! ! log.Fatalf("SQL Error - %s", err)!
! }
func processChannel(tc chan Clickout, rc chan IpResult) {!
! for click := range tc {!
! ! if subnet, err := findNetwork(click.Ip); err == nil {!
! ! ! rc <- IpResult{click, subnet}!
! ! } else {!
! ! ! rc <- IpResult{click, new(IpSubnet)}!
! ! }!
! }!
! close(rc)!
}!
func aggregateResults(rc chan IpResult, rs *map[string]*AggrResults) {!
! results := *rs!
! found, notFound := 0, 0!
!! for result := range rc {!
! ! if result.Subnet.startInt == 0 {!
! ! ! notFound += result.click.Count!
! ! ! log.Printf("Can not find ip %sn", result.click.Ip)!
! ! } else {!
! ! ! found += result.click.Count!
! ! ! log.Printf("%s is {%s - %s} n", result.click.Ip,!
! ! ! ! result.Subnet.startIp, result.Subnet.endIp)!
!! ! ! AddResult(&results, result)!
! ! }!
! }!
! fmt.Printf("%f (%d) IPs in GeoIP db and %f (%d) not found out of %dn",
float32(found)/float32(found+notFound),!
! ! found, float32(notFound)/float32(found+notFound), notFound, found+notFound)!
!! close(done)!
}!
!
package main!
!import (!
! "fmt"!
! "runtime"!
)!
!func main() {!
!! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(),
runtime.NumGoroutine())!
!! runtime.GOMAXPROCS(runtime.NumCPU())!
! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(),
runtime.NumGoroutine())!
!}!
! db, err := geoip2.Open("data/GeoLite2-City.mmdb")!
! if err != nil {!
! ! panic(err)!
! }!
! !
! ip := net.ParseIP("81.2.69.142")!
! record, err := db.City(ip)!
! if err != nil {!
! ! panic(err)!
! }!
!! fmt.Printf("Portuguese (BR) city name: %vn", record.City.Names["pt-BR"])!
! fmt.Printf("English subdivision name: %vn", record.Subdivisions[0].Names["en"])!
! fmt.Printf("Russian country name: %vn", record.Country.Names["ru"])!
! fmt.Printf("ISO country code: %vn", record.Country.IsoCode)!
! fmt.Printf("Time zone: %vn", record.Location.TimeZone)!
! fmt.Printf("Coordinates: %v, %vn", record.Location.Latitude,
record.Location.Longitude)!
!! db.Close()
Google Analytics and BigQuery
var config = &oauth.Config{!
! ClientId: “client-id-here.apps.googleusercontent.com",!
! ClientSecret: “client-secret-here“,!
! Scope: "https://www.googleapis.com/auth/analytics.readonly",!
! AuthURL: "https://accounts.google.com/o/oauth2/auth",!
! TokenURL: "https://accounts.google.com/o/oauth2/token",!
}
! oauthHttpClient := getOAuthClient(config)!
! analyticsService, err := analytics.New(oauthHttpClient)!
! if err != nil {!
! ! log.Fatal("Failed to create GA service")!
! }!
!! dataService := analytics.NewDataGaService(analyticsService)!
! dataGaGetCall := dataService.Get(gaId, start, end, metrics)
! data, err := dataGaGetCall.Do()!
! if err != nil {!
! ! log.Fatal("Failed fetch data from GA")!
! }!
!! return data.Rows
func main() {!
! gaOptions := map[string]string{!
! ! "dimensions": "ga:region,ga:city",!
! ! "sort": "-ga:visits",!
! ! "limit": "10",!
! }!
! rows := fetchGAData(config, "ga:11781168", "2014-04-06", "2014-04-06", !
"ga:visits", gaOptions)!
!! for row := 0; row <= len(rows)-1; row++ {!
! ! fmt.Printf("row=%d %vn", row, rows[row])!
! }!
}
Crunching data with go: Tips, tricks, use-cases
! config := &oauth.Config{!
! ! ClientId: "client-id-here.apps.googleusercontent.com",!
! ! ClientSecret: "client-secret-here",!
! ! Scope: bigquery.BigqueryScope,!
! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",!
! ! TokenURL: "https://accounts.google.com/o/oauth2/token",!
! }!
!
! transport := &oauth.Transport{!
! ! Token: token,!
! ! Config: config,!
! }!
! client := transport.Client()
! service, err := bigquery.New(client)!
! if err != nil {!
! ! panic(err)!
! }!
!
! datasetList, err := service.Datasets.List(“testing-project").Do()!
! if err != nil {!
! ! panic(err)!
! }!
!
! for _, d := range datasetList.Datasets {!
! ! fmt.Println(d.FriendlyName)!
! }!
Useful and interesting Gophers
Interesting Gophers
• Golang machine learning lib 

https://github.com/xlvector/hector
• Logistic Regression
• Factorized Machine
• CART, Random Forest, Random Decision Tree,
Gradient Boosting Decision Tree
• Neural Network
Interesting Gophers
• library for numeric operation

https://github.com/gonum - fairly, but they are working
to bring some useful packages
• matrix - Scientific math package for the Go
language.
• graph - Discrete math structures and functions
Reference list
• Why are ‘Cool Kids’ at Github Moving to GO
Language? - http://www.homolog.us/blogs/blog/
2014/01/16/golang/
• How suitable Go will be for scientific computing? -
https://groups.google.com/forum/#!topic/golang-
nuts/_VoZfniBTZE
Thank you!
M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
S e r g i i K h o m e n k o ,
D a t a S c i e n t i s t
S T Y L I G H T G m b H
s e r g i i . k h o m e n k o @ s t y l i g h t . c o m
@ l c 0 d 3 r
!
S T Y L I G H T . C O M
DAHO.AM — Developer Conference 06-06-14
S A F E T H E D A T E

More Related Content

Crunching data with go: Tips, tricks, use-cases

  • 1. ! Crunching data with go: Tips, tricks, use-cases S e r g i i K h o m e n k o , D a t a S c i e n t i s t , S T Y L I G H T s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
  • 2. Agenda Relational databases ! Google Analytics and BigQuery ! Geolocation ! Useful things from Go-world W H A T I T ’ S A B O U T
  • 4. • github.com/jmoiron/sqlx type Clickout struct {! ! Id, Count int! ! Ip string! ! Type int! ! Commision, Eu_commission float32! }
  • 5. ! db, err := sqlx.Connect(config.Database.Driver, fmt.Sprintf("%s:%s@%s(%s)/%s? parseTime=true", config.Database.Username,! ! ! config.Database.Password, config.Database.Protocol, config.Database.Server, config.Database.Database))! !! fmt.Printf("Connect to %s:(%s)... n", config.Database.Protocol, config.Database.Server)! ! if err != nil {! ! ! log.Fatalf("Can not connect to the mysql server - %s", err)! ! ! return! ! }! ! defer db.Close()! !!
  • 6. ! dbParams := paramStruct{"start": arguments["<from>"].(string) + " 00:00:00", "end": arguments["<to>"].(string) + " 23:59:59"}! ! geoParams := paramStruct{}! !! siteStr, _ := arguments["--site"].(string)! ! if siteInt, err2 := strconv.Atoi(siteStr); err2 == nil {! ! ! dbParams["site"] = siteInt! ! }! !! query := getClickoutsQuery(dbParams)! ! rows, err := db.Queryx(query)
  • 7. ! if err == nil {! ! ! for rows.Next() {! ! ! ! click := Clickout{}! !! ! ! err2 := rows.StructScan(&click)! ! ! ! if err2 == nil {! ! ! ! ! task <- click! !! ! ! } else {! ! ! ! ! fmt.Println(err2)! ! ! ! }! ! ! }! ! ! close(task)! ! } else {! ! ! log.Fatalf("SQL Error - %s", err)! ! }!
  • 8. Geolocation W H E R E M Y I P S A R E F R O M
  • 9. ! task := make(chan Clickout)! ! result := make(chan IpResult)! ! done = make(chan interface{})! !! go processChannel(task, result)! ! go aggregateResults(result, &results)! !! if err == nil {! ! ! for rows.Next() {! ! ! ! click := Clickout{}! !! ! ! err2 := rows.StructScan(&click)! ! ! ! if err2 == nil {! ! ! ! ! task <- click! !! ! ! } else {! ! ! ! ! fmt.Println(err2)! ! ! ! }! ! ! }! ! ! close(task)! ! } else {! ! ! log.Fatalf("SQL Error - %s", err)! ! }
  • 10. func processChannel(tc chan Clickout, rc chan IpResult) {! ! for click := range tc {! ! ! if subnet, err := findNetwork(click.Ip); err == nil {! ! ! ! rc <- IpResult{click, subnet}! ! ! } else {! ! ! ! rc <- IpResult{click, new(IpSubnet)}! ! ! }! ! }! ! close(rc)! }!
  • 11. func aggregateResults(rc chan IpResult, rs *map[string]*AggrResults) {! ! results := *rs! ! found, notFound := 0, 0! !! for result := range rc {! ! ! if result.Subnet.startInt == 0 {! ! ! ! notFound += result.click.Count! ! ! ! log.Printf("Can not find ip %sn", result.click.Ip)! ! ! } else {! ! ! ! found += result.click.Count! ! ! ! log.Printf("%s is {%s - %s} n", result.click.Ip,! ! ! ! ! result.Subnet.startIp, result.Subnet.endIp)! !! ! ! AddResult(&results, result)! ! ! }! ! }! ! fmt.Printf("%f (%d) IPs in GeoIP db and %f (%d) not found out of %dn", float32(found)/float32(found+notFound),! ! ! found, float32(notFound)/float32(found+notFound), notFound, found+notFound)! !! close(done)! }! !
  • 12. package main! !import (! ! "fmt"! ! "runtime"! )! !func main() {! !! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine())! !! runtime.GOMAXPROCS(runtime.NumCPU())! ! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine())! !}!
  • 13. ! db, err := geoip2.Open("data/GeoLite2-City.mmdb")! ! if err != nil {! ! ! panic(err)! ! }! ! ! ! ip := net.ParseIP("81.2.69.142")! ! record, err := db.City(ip)! ! if err != nil {! ! ! panic(err)! ! }! !! fmt.Printf("Portuguese (BR) city name: %vn", record.City.Names["pt-BR"])! ! fmt.Printf("English subdivision name: %vn", record.Subdivisions[0].Names["en"])! ! fmt.Printf("Russian country name: %vn", record.Country.Names["ru"])! ! fmt.Printf("ISO country code: %vn", record.Country.IsoCode)! ! fmt.Printf("Time zone: %vn", record.Location.TimeZone)! ! fmt.Printf("Coordinates: %v, %vn", record.Location.Latitude, record.Location.Longitude)! !! db.Close()
  • 15. var config = &oauth.Config{! ! ClientId: “client-id-here.apps.googleusercontent.com",! ! ClientSecret: “client-secret-here“,! ! Scope: "https://www.googleapis.com/auth/analytics.readonly",! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",! ! TokenURL: "https://accounts.google.com/o/oauth2/token",! }
  • 16. ! oauthHttpClient := getOAuthClient(config)! ! analyticsService, err := analytics.New(oauthHttpClient)! ! if err != nil {! ! ! log.Fatal("Failed to create GA service")! ! }! !! dataService := analytics.NewDataGaService(analyticsService)! ! dataGaGetCall := dataService.Get(gaId, start, end, metrics)
  • 17. ! data, err := dataGaGetCall.Do()! ! if err != nil {! ! ! log.Fatal("Failed fetch data from GA")! ! }! !! return data.Rows
  • 18. func main() {! ! gaOptions := map[string]string{! ! ! "dimensions": "ga:region,ga:city",! ! ! "sort": "-ga:visits",! ! ! "limit": "10",! ! }! ! rows := fetchGAData(config, "ga:11781168", "2014-04-06", "2014-04-06", ! "ga:visits", gaOptions)! !! for row := 0; row <= len(rows)-1; row++ {! ! ! fmt.Printf("row=%d %vn", row, rows[row])! ! }! }
  • 20. ! config := &oauth.Config{! ! ! ClientId: "client-id-here.apps.googleusercontent.com",! ! ! ClientSecret: "client-secret-here",! ! ! Scope: bigquery.BigqueryScope,! ! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",! ! ! TokenURL: "https://accounts.google.com/o/oauth2/token",! ! }! ! ! transport := &oauth.Transport{! ! ! Token: token,! ! ! Config: config,! ! }! ! client := transport.Client()
  • 21. ! service, err := bigquery.New(client)! ! if err != nil {! ! ! panic(err)! ! }! ! ! datasetList, err := service.Datasets.List(“testing-project").Do()! ! if err != nil {! ! ! panic(err)! ! }! ! ! for _, d := range datasetList.Datasets {! ! ! fmt.Println(d.FriendlyName)! ! }!
  • 23. Interesting Gophers • Golang machine learning lib 
 https://github.com/xlvector/hector • Logistic Regression • Factorized Machine • CART, Random Forest, Random Decision Tree, Gradient Boosting Decision Tree • Neural Network
  • 24. Interesting Gophers • library for numeric operation
 https://github.com/gonum - fairly, but they are working to bring some useful packages • matrix - Scientific math package for the Go language. • graph - Discrete math structures and functions
  • 25. Reference list • Why are ‘Cool Kids’ at Github Moving to GO Language? - http://www.homolog.us/blogs/blog/ 2014/01/16/golang/ • How suitable Go will be for scientific computing? - https://groups.google.com/forum/#!topic/golang- nuts/_VoZfniBTZE
  • 26. Thank you! M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
  • 27. M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H S e r g i i K h o m e n k o , D a t a S c i e n t i s t S T Y L I G H T G m b H s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r ! S T Y L I G H T . C O M
  • 28. DAHO.AM — Developer Conference 06-06-14 S A F E T H E D A T E