Gophercon 2018 - Computer Vision Using Go and OpenCV 3


By Beyang Liu for the GopherCon Liveblog on August 28, 2018

Presenter: Ron Evans

Liveblogger: @beyang

An introduction to and tour of computer vision programs written in Go using GoCV––with code snippets and live demos!

Ron Evans is the creator of open source robotics projects Gobot.io and Gocv.io, and the leader of The Hybrid Group, the "software company that makes hardware companies look good."

image

What is computer vision?

Computer vision applications include

  • Motion detection
  • People recognition
  • Telepresence
  • Autonomous vehicles
  • Augmented humans (e.g., IoT, tech-enhanced surgery)

Why should you use Go for computer vision? Well, the same reasons you should use Go for everything!

  • Concurrency
  • Portability
  • Performance

How GoCV works

GO → CGO → C → C++

image

That sounds complex, but they've done all the hard work for you, so you just have to write Go.

Works on Linux, macOS, and even Windows.

The "Hello, World" of video

package main import ( "gocv.io/x/gocv"
) func main() { webcam, _ := gocv.VideoCaptureDevice(0) window := gocv.NewWindow("Hello") img := gocv.NewMat() for { webcam.Read(&img) window.IMShow(img) gocv.WaitKey(1) }
}

Demo! https://www.youtube.com/watch?v=nyk-gWpqJ8A

Into the Mat(rix)

The fundamental unit of data you manipulate in OpenCV is the Mat (short for "matrix").

image

A grayscale image is a Mat with 2 dimensions, each value is a 16-bit integer (representing the intensity of the pixel).

image

An RGB color image has 2 dimensions, each value is a 16-bit integer, but 3 channels (one for red, green, and blue).

image

A Mat can also represent other types of data, like a 3D point cloud:

image

4 applications using GoCV

Lets take a look at 4 tiny applications that represent the typical applications you might want to solve in computer vision.

Application 1: Face tracking

Well, face tracking isn't cool anymore. There are privacy concerns. So let's talk about face blurring instead :)

Haar wavelets are useful features for face detection:

image

Here's a wavelet that's good for eyes:

image

Here's one that works well for the nose:

image

Or, we could just use the gocv CascadeClassifier--easier since we've done the hard stuff for you:

package main import ( "fmt" "image" "os" "strconv" "gocv.io/x/gocv"
) func main() { if len(os.Args) < 3 { fmt.Println("How to run:\n\tfaceblur [camera ID] [classifier XML file]") return } deviceID, _ := strconv.Atoi(os.Args[1]) xmlFile := os.Args[2] webcam, err := gocv.VideoCaptureDevice(deviceID) if err != nil { fmt.Printf("error opening video capture device: %v\n", deviceID) return } defer webcam.Close() window := gocv.NewWindow("Face Blur") defer window.Close() img := gocv.NewMat() defer img.Close() classifier := gocv.NewCascadeClassifier() defer classifier.Close() classifier.Load(xmlFile) fmt.Printf("start reading camera device: %v\n", deviceID) for { if ok := webcam.Read(&img); !ok { fmt.Printf("cannot read device %d\n", deviceID) return } if img.Empty() { continue } rects := classifier.DetectMultiScale(img) fmt.Printf("found %d faces\n", len(rects)) for _, r := range rects { imgFace := img.Region(r) gocv.GaussianBlur(imgFace, &imgFace, image.Pt(75, 75), 0, 0, gocv.BorderDefault) imgFace.Close() } window.IMShow(img) if window.WaitKey(1) >= 0 { break } }
}

Demo! https://www.youtube.com/watch?v=dlP8dSl92jk

Application 2: Motion detection/tracking

  • Background subtraction
  • Mixture of Gaussian (MoG)

image

image

image

gocv.BackgroundSubtractorMOG2{}
package main import ( "fmt" "image" "image/color" "os" "strconv" "gocv.io/x/gocv"
) const MinimumArea = 3000 func main() { if len(os.Args) < 2 { fmt.Println("How to run:\n\tmotion-detect [camera ID]") return } deviceID, _ := strconv.Atoi(os.Args[1]) webcam, err := gocv.VideoCaptureDevice(int(deviceID)) if err != nil { fmt.Printf("Error opening video capture device: %v\n", deviceID) return } defer webcam.Close() window := gocv.NewWindow("Motion Window") defer window.Close() img := gocv.NewMat() defer img.Close() imgDelta := gocv.NewMat() defer imgDelta.Close() imgThresh := gocv.NewMat() defer imgThresh.Close() mog2 := gocv.NewBackgroundSubtractorMOG2() defer mog2.Close() status := "Ready" fmt.Printf("Start reading camera device: %v\n", deviceID) for { if ok := webcam.Read(&img); !ok { fmt.Printf("Error cannot read device %d\n", deviceID) return } if img.Empty() { continue } status = "Ready" statusColor := color.RGBA{0, 255, 0, 0} mog2.Apply(img, &imgDelta) gocv.Threshold(imgDelta, &imgThresh, 25, 255, gocv.ThresholdBinary) kernel := gocv.GetStructuringElement(gocv.MorphRect, image.Pt(3, 3)) defer kernel.Close() gocv.Dilate(imgThresh, &imgThresh, kernel) contours := gocv.FindContours(imgThresh, gocv.RetrievalExternal, gocv.ChainApproxSimple) for i, c := range contours { area := gocv.ContourArea(c) if area < MinimumArea { continue } status = "Motion detected" statusColor = color.RGBA{255, 0, 0, 0} gocv.DrawContours(&img, contours, i, statusColor, 2) rect := gocv.BoundingRect(c) gocv.Rectangle(&img, rect, color.RGBA{0, 0, 255, 0}, 2) } gocv.PutText(&img, status, image.Pt(10, 20), gocv.FontHersheyPlain, 1.2, statusColor, 2) window.IMShow(img) if window.WaitKey(1) == 27 { break } }
}

Demo! https://www.youtube.com/watch?v=L5k53lkzECg

Application 3: MJPEG streaming

MJPEG stands for Motion-JPEG. I.e., "how do I get my webcam to livestream on the Internet?"

We're going to use GoCV to take what's in the webcam and stream it on a webpage.

package main import ( "fmt" "log" "net/http" "os" "strconv" "github.com/hybridgroup/mjpeg" "gocv.io/x/gocv"
) var ( deviceID int err error webcam *gocv.VideoCapture stream *mjpeg.Stream
) func main() { if len(os.Args) < 3 { fmt.Println("How to run:\n\tmjpeg-streamer [camera ID] [host:port]") return } deviceID, _ = strconv.Atoi(os.Args[1]) host := os.Args[2] webcam, err = gocv.VideoCaptureDevice(deviceID) if err != nil { fmt.Printf("error opening video capture device: %v\n", deviceID) return } defer webcam.Close() stream = mjpeg.NewStream() go capture() fmt.Println("Capturing. Point your browser to " + host) http.Handle("/", stream) log.Fatal(http.ListenAndServe(host, nil))
} func capture() { img := gocv.NewMat() defer img.Close() for { if ok := webcam.Read(&img); !ok { fmt.Printf("cannot read device %d\n", deviceID) return } if img.Empty() { continue } buf, _ := gocv.IMEncode(".jpg", img) stream.UpdateJPEG(buf) }
}

Demo! https://www.youtube.com/watch?v=3Gi9-eOtJRw

Object classification/tracking

Now let's take a look at object classification/tracking with a deep neural net––and a drone. Specifically, the Caffe Deep Learning Framework and the DJI Tello.

A neural network simulates the way a biological synapse works and allows us to solve certain types of machine learning problems that can't be solved with more classical programming methods.

The DJI Tello has an Intel chip that's optimized for performing neural network computations onboard the drone and is only $99.

image

We'll use the OpenCV face tracking SSD model:

image

package main import ( "fmt" "image" "image/color" "io" "math" "os" "os/exec" "strconv" "sync/atomic" "time" "gobot.io/x/gobot" "gobot.io/x/gobot/platforms/dji/tello" "gobot.io/x/gobot/platforms/joystick" "gocv.io/x/gocv"
) type pair struct { x float64 y float64
} const ( frameX = 400 frameY = 300 frameSize = frameX * frameY * 3 offset = 32767.0
) var ( ffmpeg = exec.Command("ffmpeg", "-hwaccel", "auto", "-hwaccel_device", "opencl", "-i", "pipe:0", "-pix_fmt", "bgr24", "-s", strconv.Itoa(frameX)+"x"+strconv.Itoa(frameY), "-f", "rawvideo", "pipe:1") ffmpegIn, _ = ffmpeg.StdinPipe() ffmpegOut, _ = ffmpeg.StdoutPipe() window = gocv.NewWindow("Tello") net *gocv.Net green = color.RGBA{0, 255, 0, 0} tracking = false detected = false detectSize = false distTolerance = 0.05 * dist(0, 0, frameX, frameY) refDistance float64 left, top, right, bottom float64 drone = tello.NewDriver("8890") flightData *tello.FlightData joyAdaptor = joystick.NewAdaptor() stick = joystick.NewDriver(joyAdaptor, "dualshock4") leftX, leftY, rightX, rightY atomic.Value
) func init() { leftX.Store(float64(0.0)) leftY.Store(float64(0.0)) rightX.Store(float64(0.0)) rightY.Store(float64(0.0)) go func() { handleJoystick() if err := ffmpeg.Start(); err != nil { fmt.Println(err) return } drone.On(tello.FlightDataEvent, func(data interface{}) { flightData = data.(*tello.FlightData) }) drone.On(tello.ConnectedEvent, func(data interface{}) { fmt.Println("Connected") drone.StartVideo() drone.SetVideoEncoderRate(tello.VideoBitRateAuto) drone.SetExposure(0) gobot.Every(100*time.Millisecond, func() { drone.StartVideo() }) }) drone.On(tello.VideoFrameEvent, func(data interface{}) { pkt := data.([]byte) if _, err := ffmpegIn.Write(pkt); err != nil { fmt.Println(err) } }) robot := gobot.NewRobot("tello", []gobot.Connection{joyAdaptor}, []gobot.Device{drone, stick}, ) robot.Start() }()
} func main() { if len(os.Args) < 5 { fmt.Println("How to run:\ngo run facetracker.go [model] [config] ([backend] [device])") return } model := os.Args[1] config := os.Args[2] backend := gocv.NetBackendDefault if len(os.Args) > 3 { backend = gocv.ParseNetBackend(os.Args[3]) } target := gocv.NetTargetCPU if len(os.Args) > 4 { target = gocv.ParseNetTarget(os.Args[4]) } n := gocv.ReadNet(model, config) if n.Empty() { fmt.Printf("Error reading network model from : %v %v\n", model, config) return } net = &n defer net.Close() net.SetPreferableBackend(gocv.NetBackendType(backend)) net.SetPreferableTarget(gocv.NetTargetType(target)) for { buf := make([]byte, frameSize) if _, err := io.ReadFull(ffmpegOut, buf); err != nil { fmt.Println(err) continue } img, _ := gocv.NewMatFromBytes(frameY, frameX, gocv.MatTypeCV8UC3, buf) if img.Empty() { continue } trackFace(&img) window.IMShow(img) if window.WaitKey(10) >= 0 { break } }
} func trackFace(frame *gocv.Mat) { W := float64(frame.Cols()) H := float64(frame.Rows()) blob := gocv.BlobFromImage(*frame, 1.0, image.Pt(300, 300), gocv.NewScalar(104, 177, 123, 0), false, false) defer blob.Close() net.SetInput(blob, "data") detBlob := net.Forward("detection_out") defer detBlob.Close() detections := gocv.GetBlobChannel(detBlob, 0, 0) defer detections.Close() for r := 0; r < detections.Rows(); r++ { confidence := detections.GetFloatAt(r, 2) if confidence < 0.5 { continue } left = float64(detections.GetFloatAt(r, 3)) * W top = float64(detections.GetFloatAt(r, 4)) * H right = float64(detections.GetFloatAt(r, 5)) * W bottom = float64(detections.GetFloatAt(r, 6)) * H left = math.Min(math.Max(0.0, left), W-1.0) right = math.Min(math.Max(0.0, right), W-1.0) bottom = math.Min(math.Max(0.0, bottom), H-1.0) top = math.Min(math.Max(0.0, top), H-1.0) detected = true rect := image.Rect(int(left), int(top), int(right), int(bottom)) gocv.Rectangle(frame, rect, green, 3) } if !tracking || !detected { return } if detectSize { detectSize = false refDistance = dist(left, top, right, bottom) } distance := dist(left, top, right, bottom) switch { case right < W/2: drone.CounterClockwise(50) case left > W/2: drone.Clockwise(50) default: drone.Clockwise(0) } switch { case top < H/10: drone.Up(25) case bottom > H-H/10: drone.Down(25) default: drone.Up(0) } switch { case distance < refDistance-distTolerance: drone.Forward(20) case distance > refDistance+distTolerance: drone.Backward(20) default: drone.Forward(0) }
} func dist(x1, y1, x2, y2 float64) float64 { return math.Sqrt((x2-x1)*(x2-x1) + (y2-y1)*(y2-y1))
} func handleJoystick() { stick.On(joystick.CirclePress, func(data interface{}) { drone.Forward(0) drone.Up(0) drone.Clockwise(0) tracking = !tracking if tracking { detectSize = true println("tracking") } else { detectSize = false println("not tracking") } }) stick.On(joystick.SquarePress, func(data interface{}) { fmt.Println("battery:", flightData.BatteryPercentage) }) stick.On(joystick.TrianglePress, func(data interface{}) { drone.ThrowTakeOff() println("Takeoff") }) stick.On(joystick.XPress, func(data interface{}) { drone.PalmLand() println("Land") }) stick.On(joystick.LeftX, func(data interface{}) { val := float64(data.(int16)) leftX.Store(val) }) stick.On(joystick.LeftY, func(data interface{}) { val := float64(data.(int16)) leftY.Store(val) }) stick.On(joystick.RightX, func(data interface{}) { val := float64(data.(int16)) rightX.Store(val) }) stick.On(joystick.RightY, func(data interface{}) { val := float64(data.(int16)) rightY.Store(val) }) gobot.Every(50*time.Millisecond, func() { rightStick := getRightStick() switch { case rightStick.y < -10: drone.Forward(tello.ValidatePitch(rightStick.y, offset)) case rightStick.y > 10: drone.Backward(tello.ValidatePitch(rightStick.y, offset)) default: drone.Forward(0) } switch { case rightStick.x > 10: drone.Right(tello.ValidatePitch(rightStick.x, offset)) case rightStick.x < -10: drone.Left(tello.ValidatePitch(rightStick.x, offset)) default: drone.Right(0) } }) gobot.Every(50*time.Millisecond, func() { leftStick := getLeftStick() switch { case leftStick.y < -10: drone.Up(tello.ValidatePitch(leftStick.y, offset)) case leftStick.y > 10: drone.Down(tello.ValidatePitch(leftStick.y, offset)) default: drone.Up(0) } switch { case leftStick.x > 20: drone.Clockwise(tello.ValidatePitch(leftStick.x, offset)) case leftStick.x < -20: drone.CounterClockwise(tello.ValidatePitch(leftStick.x, offset)) default: drone.Clockwise(0) } })
} func getLeftStick() pair { s := pair{x: 0, y: 0} s.x = leftX.Load().(float64) s.y = leftY.Load().(float64) return s
} func getRightStick() pair { s := pair{x: 0, y: 0} s.x = rightX.Load().(float64) s.y = rightY.Load().(float64) return s
}

Demo! https://www.youtube.com/watch?v=XnMoUCby1v0

Wrap-up

Check out GoCV.io or follow the Twitter account @GoCVio!