Thanks, I wonder if it is currently impossible to make it as fast as the nested for loops, ie that some optimizer improvement could fix it. Here's a stripped down version of my code:
import QuartzCore // This is just for timing using CACurrentMediaTime() struct Point2DInt { var x: Int var y: Int } struct Size2DInt { var width: Int var height: Int var rect: Rect2DInt { return Rect2DInt(x: 0, y: 0, width: width, height: height) } } struct Rect2DInt : Sequence { var description: String { return "(\(origin), \(size))" } var origin: Point2DInt var size: Size2DInt var minX: Int { get { return origin.x } set { size.width = maxX - newValue; origin.x = newValue } } var maxX: Int { get { return origin.x + size.width } set { size.width = newValue - minX } } var minY: Int { get { return origin.y } set { size.height = maxY - newValue; origin.y = newValue } } var maxY: Int { get { return origin.y + size.height } set { size.height = newValue - minY } } init(origin: Point2DInt, size: Size2DInt) { self.origin = origin self.size = size } init(x: Int, y: Int, width: Int, height: Int) { self.init(origin: Point2DInt(x: x, y: y), size: Size2DInt(width: width, height: height)) } init(minX: Int, minY: Int, maxX: Int, maxY: Int) { self.init(origin: Point2DInt(x: minX, y: minY), size: Size2DInt(width: maxX - minX, height: maxY - minY)) } func makeIterator() -> Rect2DIntPointIterator { return Rect2DIntPointIterator(rect: self) } } // This is the crucial type here, this version is the fastest that // I could find, but it's still slower than two nested for loops, // see test below. struct Rect2DIntPointIterator : IteratorProtocol, Sequence { let startX, startY, stopX, stopY: Int var currentPoint: Point2DInt init(rect: Rect2DInt) { currentPoint = rect.origin startX = rect.origin.x startY = rect.origin.y stopX = rect.maxX stopY = rect.maxY } mutating func next() -> Point2DInt? { defer { currentPoint.x = currentPoint.x &+ 1 } if currentPoint.x == stopX { currentPoint.x = startX currentPoint.y = currentPoint.y &+ 1 if currentPoint.y == stopY { return nil } } return currentPoint } } struct Table2D<Element> { let size: Size2DInt var storage: [Element] init(size: Size2DInt, filledWith element: Element) { precondition(size.width > 0 && size.height > 0) self.size = size self.storage = [Element](repeating: element, count: size.width * size.height) } subscript(x: Int, y: Int) -> Element { get { precondition(x >= 0 && y >= 0 && x < size.width && y < size.height) return storage[x + y * size.width] } set { precondition(x >= 0 && y >= 0 && x < size.width && y < size.height) storage[x + y * size.width] = newValue } } subscript(position: Point2DInt) -> Element { get { return self[position.x, position.y] } set { self[position.x, position.y] = newValue } } } func randomDouble() -> Double { // Returns a random Double in the range [0, 1) let ui64 = (UInt64(arc4random()) << 32) | UInt64(arc4random()) return Double(ui64 >> UInt64(63 - Double.significandBitCount)) * .ulpOfOne/2 } func randomInt(from: Int, to: Int) -> Int { // Returns an Int in the range [from, to) return Int(Double(from) + (randomDouble() * Double(to - from)).rounded(.down)) } func randomSubrects(of rect: Rect2DInt, minSize: Size2DInt, count: Int) -> [Rect2DInt] { precondition(count > 0 && minSize.width > 0 && minSize.height > 0) var subrects = [Rect2DInt]() subrects.reserveCapacity(count) for _ in 0 ..< count { let size = Size2DInt(width: randomInt(from: minSize.width, to: rect.size.width), height: randomInt(from: minSize.height, to: rect.size.height)) let origin = Point2DInt(x: randomInt(from: 0, to: rect.size.width - size.width), y: randomInt(from: 0, to: rect.size.height - size.height)) subrects.append(Rect2DInt(origin: origin, size: size)) } return subrects } func randomTable(size: Size2DInt) -> Table2D<Double> { var table = Table2D(size: size, filledWith: 0.0) for p in table.size.rect { table[p] = randomDouble() } return table } func sum1(areas: [Rect2DInt], of table: Table2D<Double>) -> Double { var sum = 0.0 for r in areas { // Using custom iterator: for p in r { sum += table[p] } } return sum } func sum2(areas: [Rect2DInt], of table: Table2D<Double>) -> Double { var sum = 0.0 for r in areas { // Using two nested for loops: for y in r.minY ..< r.maxY { for x in r.minX ..< r.maxX { sum = sum + table[x, y] } } } return sum } func test( sumFn: ([Rect2DInt], Table2D<Double>) -> Double, label: String, table: Table2D<Double>, areas: [Rect2DInt] ) { let t0 = CACurrentMediaTime() let sum = sumFn(areas, table) let t1 = CACurrentMediaTime() print(label, t1 - t0, "seconds (sum \(sum))") } for _ in 0 ..< 4 { let rndTable = randomTable(size: Size2DInt(width: 1000, height: 1000)) let rndTableAreas = randomSubrects(of: rndTable.size.rect, minSize: Size2DInt(width: 100, height: 100), count: 1000) test(sumFn: sum1, label: "sum1 - using custom iterator ", table: rndTable, areas: rndTableAreas) test(sumFn: sum2, label: "sum2 - using nested for-loops ", table: rndTable, areas: rndTableAreas) print() } // // Typical output on my MacBook Pro (Retina, 15-inch, Late 2013): // // sum1 - using custom iterator 0.480134483019356 seconds (sum 153408603.850653) // sum2 - using nested for-loops 0.348341046017595 seconds (sum 153408603.850653) // // sum1 - using custom iterator 0.426998238021042 seconds (sum 149851816.622638) // sum2 - using nested for-loops 0.34111139801098 seconds (sum 149851816.622638) // // sum1 - using custom iterator 0.466021075990284 seconds (sum 155267702.297466) // sum2 - using nested for-loops 0.351970263000112 seconds (sum 155267702.297466) // // sum1 - using custom iterator 0.426723245007452 seconds (sum 146331850.202214) // sum2 - using nested for-loops 0.340267747989856 seconds (sum 146331850.202214) // On Wed, Jan 4, 2017 at 12:42 PM, Karl <razie...@gmail.com> wrote: > Hmmm that’s interesting. A brief test I ran: > > import CoreGraphics > import Foundation > > struct PointIterator { > let rect: CGRect > var nextPoint: CGPoint > > let maxX: CGFloat > let maxY: CGFloat > > init(rect: CGRect) { > self.rect = rect.standardized > self.nextPoint = self.rect.origin > // Cache for fast iteration > self.maxX = self.rect.maxX > self.maxY = self.rect.maxY > } > > mutating func next() -> CGPoint? { > guard nextPoint.x <= maxX, nextPoint.y <= maxY else { > return .none > } > defer { > nextPoint.x += 1 > if nextPoint.x > maxX { > nextPoint.x = rect.origin.x > nextPoint.y += 1 > } > } > return nextPoint > } > } > > // Use iterator > func iteratePoints_it(_ rect: CGRect, with: (CGPoint)->()) { > var it = PointIterator(rect: rect) > while let point = it.next() { > with(point) > } > } > > // Basic unwrapping of the iterator as a function (no ‘defer’) > func iteratePoints_fe(_ rect: CGRect, with: (CGPoint)->()) { > let rect = rect.standardized > var nextPoint = rect.origin > let maxX = rect.maxX > let maxY = rect.maxY > > while true { > guard nextPoint.x <= maxX, nextPoint.y <= maxY else { > return > } > with(nextPoint) > nextPoint.x += 1 > if nextPoint.x > maxX { > nextPoint.x = rect.origin.x > nextPoint.y += 1 > } > } > } > > // for..in loop > func iteratePoints_fe2(_ rect: CGRect, with: (CGPoint)->()) { > let rect = rect.standardized > let maxX = rect.maxX > let maxY = rect.maxY > for y in stride(from: rect.origin.y, to: maxY, by: 1) { > for x in stride(from: rect.origin.x, to: maxX, by: 1) { > with(CGPoint(x: x, y: y)) > } > } > } > > func profile(_ iterations: Int, _ thing: ()->()) -> TimeInterval { > var totalTime: TimeInterval = 0 > for _ in 0..<iterations { > let start = Date().timeIntervalSinceReferenceDate > thing() > totalTime += (Date().timeIntervalSinceReferenceDate - start) > } > return totalTime/TimeInterval(iterations) > } > > > let bigRect = CGRect(x: 0, y: 0, width: 10_000, height: 10_000) > > let iterator = profile(10) { iteratePoints_it(bigRect) { if $0.x > > 1_000_000 { print("?") } } } // always false, won't be optimised out. > let foreach = profile(10) { iteratePoints_fe(bigRect) { if $0.x > > 1_000_000 { print("?") } } } > let foreach2 = profile(10) { iteratePoints_fe2(bigRect) { if $0.x > > 1_000_000 { print("?") } } } > print("iterator: \(iterator) \nforeach: \(foreach) \nforeach2: > \(foreach2)") > > > Results: > > iterator: 0.316907703876495 > foreach: 0.283202117681503 > foreach2: 0.568318998813629 > > That ranking is consistent, too. Using an iterator does appear marginally > slower than a basic unwrapping of the iterator in to a function. > > On 4 Jan 2017, at 09:56, Jens Persson via swift-users < > swift-users@swift.org> wrote: > > Hi, > > I'm working on some low-level pixel processing code (stuff that is not > possible to do using standard API or on eg GPU), and I had lots of eg these: > > for y in someStartY ..< someStopY { > for x in someStartX ..< someStopX { > ... pixels[x, y] ... > } > } > > So I implemented some (value) types like eg IntPoint2D, IntSize2D, > IntRect2D and I made an IntRect2DIterator so that IntRect2D could be a > Sequence over its (discrete) points. With this I could rewrite the above > like so: > > for pixelPosAsIntPoint2D in someIntRect2D { > ... pixels[pixelPosAsIntPoint2D] ... > } > > For some reason the first version (two nested for loops for x and y) is > always a bit faster than the abstracted version no matter how I write it > (tried using eg &+ instead of + etc). > > > > Is it possible to write as a zero cost abstraction like this, if so, how? > If not, why? > > > > > PS > > Note that eg this: > > for y in someStartY ..< someStopY { > for x in someStartX ..< someStopX { > let pixelPosAsIntPoint2D = IntPoint2D(x: x, y: y) > ... pixels[pixelPosAsIntPoint2D] ... > } > } > > is exactly as fast as the top example (using just ... pixels[x, y] ...). > So the difference in execution time seems to be due to something in the > Iterator and not eg the pixel accessing subscript taking the 2d int point > type instead of separate x and y ints. > > Here is one Iterator variant that I have tested: > > struct Rect2DIntPointIterator : IteratorProtocol, Sequence { > let startX, startY, stopX, stopY: Int > var px, py: Int > init(rect: Rect2DInt) { > startX = rect.origin.x > startY = rect.origin.y > stopX = rect.maxX > stopY = rect.maxY > px = startX > py = startY > } > mutating func next() -> Point2DInt? { > defer { px = px &+ 1 } > if px == stopX { > px = startX > py = py &+ 1 > if py == stopY { return nil } > } > return Point2DInt(x: px, y: py) > } > } > > > And here are typical execution times from an example test: > 2.1 seconds using my Iterator (the fastest I can get it, no matter how I > try to rewrite it). > 1.5 seconds using nested x, y for loops. > > I'm pretty sure my testing is done thoroughly (measuring average of many > runs, using random data, avoiding dead code elimination, whole module > optimization, etc). > > I have tried profiling the code and looking at the disassmbly but I'm > failing to understand what's going on. > > So the ultimate answer would be in the form of a (2d, Int) Rectangle type > whose (2d, Int) Points can be iterated in a for loop, at zero cost compared > to doing the same using two nested for loops. Or an explanation of why this > is impossible. > > DS > > /Jens > > _______________________________________________ > swift-users mailing list > swift-users@swift.org > https://lists.swift.org/mailman/listinfo/swift-users > > >
_______________________________________________ swift-users mailing list swift-users@swift.org https://lists.swift.org/mailman/listinfo/swift-users