tobecontinued commented on a change in pull request #17754: URL: https://github.com/apache/incubator-mxnet/pull/17754#discussion_r419449575
########## File path: tests/python/unittest/test_higher_order_grad.py ########## @@ -570,6 +571,290 @@ def check_nth_order_unary(x, op, grad_ops, orders, rtol=None, atol=None): assert_almost_equal( expected_grad, computed_grad.asnumpy(), rtol=rtol, atol=atol) +@with_seed() +def test_elemwise_sub(): + def sub(inputs): + return nd.elemwise_sub(inputs[0], inputs[1]) + def grad_op(inputs): + return [nd.ones_like(inputs[0]), nd.negative(nd.ones_like(inputs[1]))] + def grad_grad_op(inputs): + return [nd.zeros_like(inputs[0]), nd.zeros_like(inputs[1])] + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + x, y = random_arrays(shape, shape) + check_nth_order_binary([x, y], sub, [grad_op, grad_grad_op], [1, 2]) + +@with_seed() +def test_elemwise_mul(): + def mul(inputs): + return nd.elemwise_mul(inputs[0], inputs[1]) + def grad_op(inputs): + return [inputs[1], inputs[0]] + def grad_grad_op(inputs): + return [nd.zeros_like(inputs[0]) ,nd.zeros_like(inputs[1])] + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + x, y = random_arrays(shape, shape) + check_nth_order_binary([x, y], mul, [grad_op, grad_grad_op], [1, 2]) + +@with_seed() +def test_power(): + def power(inputs): + return nd.power(inputs[0], inputs[1]) + + def grad_op(inputs): + x, y = inputs + return [y * nd.power(x, y - 1), nd.power(x, y) * nd.log(x)] + + def grad_grad_op(inputs): + x, y = inputs + return [y * (y - 1) * nd.power(x, y - 2), nd.power(x, y) * (nd.log(x) ** 2)] + + def grad_grad_grad_op(inputs): + x, y = inputs + return [y * (y - 1) * (y - 2) * nd.power(x, y - 3), nd.power(x, y) * (nd.log(x) ** 3)] + + low = 1.0 + high = 3.0 + for dim in range(1, 5): + shape = rand_shape_nd(dim) + x = nd.random.uniform(low, high, shape) + y = nd.random.uniform(low, high, shape) + check_nth_order_binary([x, y], power, [grad_op, grad_grad_op, grad_grad_grad_op], [1, 2, 3]) + +# based on gen_broadcast_data in test_operation.py +def gen_broadcast_shape(idx): + # Manually set test cases + binary_op_data_shape = nd.array( + [[[2, 5, 1, 30, 7], [1, 5, 448, 30, 1]], + [[10, 49, 1, 77, 17], [10, 1, 2, 1, 17]], + [[13, 2, 65, 2, 1], [13, 1, 65, 1, 225]], + [[9, 434, 4, 2, 37], [9, 1, 4, 1, 37]], + [[2, 52, 1, 4, 1], [1, 52, 60, 1, 37]], + [[1, 23, 7, 122, 50], [2, 1, 7, 1, 50]], + [[1, 17, 1, 5, 1], [22, 1, 2, 1, 28]], + [[29, 1, 2, 1, 8], [29, 22, 1, 130, 1]], + [[2, 36, 1, 427, 3], [1, 36, 11, 427, 1]], + [[1, 2, 1, 100, 7], [1, 2, 448, 100, 1]], + [[1, 2, 495, 77, 7], [1, 2, 1, 1, 7]], + [[1, 43, 65, 2, 1], [1, 43, 65, 1, 225]], + [[1, 92, 434, 2, 2], [1, 92, 1, 2, 2]], + [[1, 92, 1, 4, 1], [1, 92, 134, 1, 17]], + [[1, 53, 2, 122, 143], [1, 1, 2, 1, 143]], + [[1, 179, 1, 87, 17], [1, 179, 1, 1, 17]], + [[1, 1, 17, 5, 1], [1, 22, 1, 1, 28]], + [[1, 2, 1, 1, 8], [1, 2, 52, 430, 1]], + [[1, 163, 1, 22, 3], [1, 163, 116, 22, 1]], + [[1, 1, 44, 30, 7], [1, 1, 44, 30, 1]], + [[1, 1, 1, 1, 28], [1, 127, 1, 5, 28]], + [[1, 2, 394, 38, 1], [1, 2, 394, 38, 16]], + [[1, 10, 49, 77, 17], [1, 1, 1, 1, 17]], + [[1, 431, 6, 2, 225], [1, 1, 6, 2, 225]], + [[1, 15, 1, 28, 1], [1, 15, 1, 28, 463]], [[1, 129, 2, 48, 96], [1, 129, 2, 1, 1]], + [[1, 1, 403, 17, 2], [1, 44, 403, 17, 2]], + [[1, 1, 65, 2, 22], [1, 1, 65, 1, 1]], + [[1, 24, 103, 17, 18], [1, 24, 1, 1, 1]], + [[1, 1, 1, 1, 2], [1, 24, 194, 50, 1]], + [[1, 1, 107, 84, 9], [1, 1, 1, 1, 1]]]) + if idx < binary_op_data_shape.shape[0]: + l_shape = binary_op_data_shape[idx][0] + r_shape = binary_op_data_shape[idx][1] + else: + # Generate random data that has ndim between 1-7 and all the shape dims between 1-5 + ndim = nd.random.randint(1, 6) + shape = nd.random.randint(1, 6, size=(ndim,)) + l_same_dim = nd.random.randint(0, 5) + r_same_dim = nd.random.randint(0, 5) + l_axis_flags = nd.random.randint(0, 2, size=ndim) + r_axis_flags = nd.random.randint(0, 2, size=ndim) + if l_same_dim == 4: + l_axis_flags = nd.ones(ndim) + if r_same_dim == 4: + r_axis_flags = nd.ones(ndim) + l_shape = shape.copy() + r_shape = shape.copy() + l_shape[nd.where(l_axis_flags == 0)] = 1 + r_shape[nd.where(r_axis_flags == 0)] = 1 + return tuple(l_shape.asnumpy().astype(int)), tuple(r_shape.asnumpy().astype(int)) + +# from test_operation.py +def reduce_op(shape, x): + if shape == x.shape: + return x + keepdims_shape = list(x.shape) + for i in range(len(shape)): + if x.shape[i] != shape[i]: + keepdims_shape[i] = 1 + x = nd.sum(x, axis=i).reshape(keepdims_shape) + return x + +@with_seed() +def test_broadcast_power(): + def broadcast_power(inputs): + return nd.broadcast_power(inputs[0], inputs[1]) + + def unreduced_grad_op(inputs): + x, y = inputs + return [y * nd.broadcast_power(x, y - 1), nd.broadcast_power(x, y) * nd.log(x)] + + def unreduced_grad_grad_op(inputs): + x, y = inputs + return [y * (y - 1) * nd.broadcast_power(x, y - 2), nd.broadcast_power(x, y) * (nd.log(x) ** 2)] + + def unreduced_grad_grad_grad_op(inputs): + x, y = inputs + return [y * (y - 1) * (y - 2) * nd.broadcast_power(x, y - 3), nd.broadcast_power(x, y) * (nd.log(x) ** 3)] + + low = 1.0 + high = 3.0 + for dim in range(1, 5): + x_shape, y_shape = gen_broadcast_shape(dim) + x = nd.random.uniform(low, high, x_shape) + y = nd.random.uniform(low, high, y_shape) + + check_nth_order_binary([x, y], broadcast_power, [unreduced_grad_op, unreduced_grad_grad_op, + unreduced_grad_grad_grad_op], [1, 2, 3], True, rtol=1e-3, atol=1e-5) + +def autograd_grad_ex(heads, variables, head_grads=None, retain_graph=None, create_graph=False, + train_mode=True): + """ If some variables don't in the path of computing heads, we set the heads grad of them to zero + instead of throwing exceptions. + + The autograd.grad requires user knows which variables involved to compute the heads grad of them. + That's fine for first order grad, but for higher order grad, the variables used to compute the heads, + may not used to computer their higher order grad. It's impossible to ask user to know + the formulas of every order grad. + + E.g. we use such code to compute 2-nd order gradient: + with autograd.record(): + z = op(x, y) + head_grad = nd.ones_like(z) + dz_dx, _ = autograd.grad(heads=z, variables=[x, y], head_grads=nd.ones_like(z), + create_graph=True, retain_graph=True) + d2z_d2x, _ = autograd.grad(heads=dz_dx, variables=[x, y], head_grads=nd.ones_like(dz_dx), + create_graph=True, retain_graph=True) + If z = x * y, because d2z_d2x = 0, MXNET will report the input is unreachable from the output. + But it seems in that case MXNET returns zeros is more reasonable. + """ + # xxx: only consider one head currently + argument_names = autograd.get_symbol(heads).list_arguments() + + # XXX: in some cases, a variable may has more than one outputs, we need a other way ot get the name of various. + # But in the unittest, it is fine + variable_names = [autograd.get_symbol(variable).list_outputs()[0] for variable in variables] + involved_variable_indexes = [] + involved_variables = [] + for i in range(0, len(variables)): + if variable_names[i] in argument_names: + involved_variables.append(variables[i]) + involved_variable_indexes.append(i) + + if involved_variables: + partial_grads = autograd.grad(heads, involved_variables, head_grads, retain_graph, create_graph, train_mode) + else: + partial_grads = [] + + grads = [] + partial_grads_index = 0 + for i in range(0, len(variables)): + if i in involved_variable_indexes: + grads.append(partial_grads[partial_grads_index]) + partial_grads_index += 1 + else: + grads.append(nd.zeros_like(variables[i])) + return grads + + +def check_nth_order_binary(inputs, op, grad_ops, orders, broadcast_op = False, rtol=None, atol=None): + """Assert n-th order autograd gradient against expected gradient. + + Multiple order of gradients can be checked by passing list of + function computing the particular order gradient and passing the corresponding list of order. + Note + ---- + 1. Orders should always be monotonically increasing. + 2. Elements of grads_ops should correspond to elements of orders + i.e. grads_op = [grad_op, grad_grad_grad_op] should be passed with + orders = [1, 3] + + Parameters + ---------- + inputs : tuple of mxnet.NDArray (x, y) + Input Array. + op : Callable (x,y) -> z + Operation to perform on Input Array. + grad_ops : Callable or List of Callable + Function (x,y) -> (n_grad_x, n_grad_y) to compute and assert gradient of given order. + orders : int or List of int + Order/s to assert expected and computed gradients. + + Returns + ------- + None + + """ + if isinstance(orders, int): + orders = [orders] + grad_ops = [grad_ops] + + assert all(i < j for i, j in zip(orders[0:-1], orders[1:])), \ + "orders should be monotonically increasing" + assert len(set(orders)) == len(orders), \ + "orders should have unique elements" + highest_order = max(orders) + + inputs = [nd.array(input) for input in inputs] + for input in inputs: + input.attach_grad() + + expected_grads = [grad_op(inputs) for grad_op in grad_ops] + computed_grads = [] + head_grads = [[]] + + # Perform compute. + with autograd.record(): + z = op(inputs) + heads = [z for _ in inputs] + for current_order in range(1, highest_order+1): + grads = [] + new_head_grads = [] + new_heads = [] + for i in range(0, len(heads)): + head = heads[i] + head_grad = nd.random.normal(shape=head.shape) + new_head_grads.append(head_grad) + grads.append(autograd_grad_ex(heads=head, variables=inputs, head_grads=head_grad, + create_graph=True, retain_graph=True)[i]) + # If we only use once auto grad with head_grads = head_grad in every iteration, + # in the i-th iteration, we use head = derivative_(i-1) * head_grad_(i-1) + # but in the expected computed, we use head = derivative_(i-1) + # why it is woks in the check_nth_order_unary? + # Because most of them defined gradient of the first gradient (derivative_(1) * head_grad_(1)) + # of the function, and in the gradient function, they manually defined derivative_(i-1) + # and use it to computed derivative_(1) * head_grad_(1). + # It maybe a wrong approach, because the gradient of the first gradient should compute the grad of + # derivative_(1) * head_grad_(1) instead of gradient of derivative_(1). Review comment: I rechecked the code. you are right, The `check_nth_order_unary` is correct. because https://github.com/apache/incubator-mxnet/blob/8c76631caa5f349aef5d515ec95a64d2954dbcef/tests/python/unittest/test_higher_order_grad.py#L564-L566 Thx!. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org